Python MozillaCookieJar.load примеры использования

Язык программирования: Python

Пространство имен/Пакет: cookielib

Класс/Тип: MozillaCookieJar

Метод/Функция: load

Примеров на hotexamples.com: 45

Python MozillaCookieJar.load - 45 примеров найдено. Это лучшие примеры Python кода для cookielib.MozillaCookieJar.load, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MozillaCookieJar(30)

load(27)

save(19)

clear(3)

clear_expired_cookies(2)

_really_load(1)

add_cookie_header(1)

extract_cookies(1)

set_cookie(1)

Пример #1

Показать файл

class RDWorker:
    """
    Worker class to perform Real-Debrid related actions:
    - format login info so they can be used by Real-Debrid
    - login
    - unrestricting links
    - keeping cookies
    """

    _endpoint = 'http://www.real-debrid.com/ajax/%s'

    def __init__(self, cookie_file):
        self._cookie_file = cookie_file
        self.cookies = MozillaCookieJar(self._cookie_file)

    def login(self, username, password_hash):
        """
        Log into Real-Debrid. password_hash must be a MD5-hash of the password string.
        :param username:
        :param password_hash:
        :return: :raise:
        """
        if path.isfile(self._cookie_file):
            self.cookies.load(self._cookie_file)

            for cookie in self.cookies:
                if cookie.name == 'auth' and not cookie.is_expired():
                    return  # no need for a new cookie

        # request a new cookie if no valid cookie is found or if it's expired
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        try:
            response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash}))
            resp = load(response)
            opener.close()

            if resp['error'] == 0:
                self.cookies.save(self._cookie_file)
            else:
                raise LoginError(resp['message'].encode('utf-8'), resp['error'])
        except Exception as e:
            raise Exception('Login failed: %s' % str(e))

    def unrestrict(self, link, password=''):
        """
        Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename.
        :param link: url to unrestrict
        :param password: password to use for the unrestriction
        :return: :raise:
        """
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password}))
        resp = load(response)
        opener.close()

        if resp['error'] == 0:
            info = resp['generated_links'][0]
            return info[2], info[0].replace('/', '_')
        else:
            raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])

Пример #2

Показать файл

Файл: Fetion.py Проект: falconchen/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile
            
        # try:
            # with open(cookiesfile, 'rb') as f:
                # cookie_processor = load(f)
        # except:
            # cookie_processor = HTTPCookieProcessor(CookieJar())            
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
          f=open(cookiesfile)
        except IOError:
          f=open(cookiesfile,'w')  
          f.write(MozillaCookieJar.header)
        finally:
          f.close()                  
        cookiejar.load(filename=cookiesfile)  
        cookie_processor = HTTPCookieProcessor(cookiejar)        
        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))        
        self.changestatus(status)

Пример #3

Показать файл

Файл: Fetion.py Проект: GitHublong/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Пример #4

Показать файл

Файл: netflix.py Проект: mrowl/filmdata

 def _get_cookie_headers(cls):
     jar = MozillaCookieJar(config.netflix.cookies_path)
     jar.load()
     cookies = []
     for line in jar:
         cookies.append('='.join((line.name, line.value)))
     return cookies

Пример #5

Показать файл

Файл: utility.py Проект: tjxs/acgindex

	def GetWithCookie( url, cookie_name, data = '', retry = 3):
		global PATH_TMP, ACGINDEX_UA

		try:
			cj = MozillaCookieJar( PATH_TMP + cookie_name )

			try :
				cj.load( PATH_TMP + cookie_name )
			except:
				pass # 还没有cookie只好拉倒咯

			ckproc = urllib2.HTTPCookieProcessor( cj )

			AmagamiSS = urllib2.build_opener( ckproc )
			AmagamiSS.addheaders = [ ACGINDEX_UA ]

			if data != '':
				request = urllib2.Request( url = url, data = data )
				res = AmagamiSS.open( request )
				cj.save() # 只有在post时才保存新获得的cookie
			else:
				res = AmagamiSS.open( url )

			return Haruka.GetContent( res )

		except:
			# 这里有3次重新连接的机会，3次都超时就跳过
			if retry > 0 : 
				return Haruka.GetWithCookie( url, cookie_name, data , retry-1 )
			else:
				return False

Пример #6

Показать файл

Файл: Fetion.py Проект: qqshow/PyWapFetion

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Пример #7

Показать файл

Файл: basebrowser.py Проект: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k),
                                           parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Пример #8

Показать файл

Файл: default.py Проект: marki555/plugin.video.markiza.sk

def LIVE(url, relogin=False):
    if not (settings['username'] and settings['password']):
        xbmcgui.Dialog().ok('Chyba', 'Nastavte prosím moja.markiza.sk konto',
                            '', '')
        xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xbmcgui.ListItem())
        raise RuntimeError
    cj = MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    if not relogin:
        try:
            cj.load(cookiepath)
        except IOError:
            relogin = True
    if relogin:
        response = opener.open(loginurl).read()
        token = re.search(r'name=\"_token_\" value=\"(\S+?)\">',
                          response).group(1)
        logindata = urllib.urlencode({
            'email': settings['username'],
            'password': settings['password'],
            '_token_': token,
            '_do': 'content1-loginForm-form-submit'
        }) + '&login=Prihl%C3%A1si%C5%A5+sa'
        opener.open(loginurl, logindata)
        log('Saving cookies')
        cj.save(cookiepath)

    response = opener.open(url).read()
    link = re.search(r'<iframe src=\"(\S+?)\"', response).group(
        1)  #https://videoarchiv.markiza.sk/api/v1/user/live
    link = link.replace('&amp;', '&')
    response = opener.open(link).read()
    if '<iframe src=\"' not in response:  #handle expired cookies
        if relogin:
            xbmcgui.Dialog().ok('Chyba', 'Skontrolujte prihlasovacie údaje',
                                '', '')
            raise RuntimeError  # loop protection
        else:
            LIVE(url, relogin=True)
            return
    opener.addheaders = [('Referer', link)]
    link = re.search(r'<iframe src=\"(\S+?)\"',
                     response).group(1)  #https://media.cms.markiza.sk/embed/
    response = opener.open(link).read()
    if '<title>Error</title>' in response:
        error = re.search('<h2 class="e-title">(.*?)</h2>', response).group(
            1)  #Video nie je dostupné vo vašej krajine
        xbmcgui.Dialog().ok('Chyba', error, '', '')
        raise RuntimeError
    link = re.search(r'\"hls\": \"(\S+?)\"', response).group(
        1)  #https://h1-s6.c.markiza.sk/hls/markiza-sd-master.m3u8
    response = opener.open(link).read()

    cookies = '|Cookie='
    for cookie in cj:
        cookies += cookie.name + '=' + cookie.value + ';'
    cookies = cookies[:-1]
    play_item = xbmcgui.ListItem(path=link + cookies)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item)

Пример #9

Показать файл

Файл: basebrowser.py Проект: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Пример #10

Показать файл

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-u", "--username",
                    help="the username to login as.")
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option("-i", "--idp",
                    help="unique ID of the IdP used to log in")
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
                    help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable default format.
    logging.basicConfig(level=log_level)

    if not args:
        optp.print_help()
        return

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    sp = args[0]


    idp = Idp(opts.idp)
    c = CredentialManager()
    if opts.username:
        c.username = opts.username

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    shibboleth = Shibboleth(idp, c, cj)
    shibboleth.openurl(sp)
    print("Successfully authenticated to %s" % sp)

    cj.save()

Пример #11

Показать файл

Файл: shiblogout.py Проект: grith/sibboleth

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
            help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING  # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable
    # default format.
    logging.basicConfig(level=log_level)

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    if args:
        sp = args[0]

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    logout_urls = []
    for cookie in cj:
        if cookie.name.startswith('_shibsession_') or \
               cookie.name.startswith('_shibstate_'):
            logout_urls.append(
                "https://%s/Shibboleth.sso/Logout" % cookie.domain)

    logout_urls = list(set(logout_urls))

    opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj))
    for url in logout_urls:
        request = urllib2.Request(url)
        log.debug("GET: %s" % request.get_full_url())
        response = opener.open(request)

    cj.save()

Пример #12

Показать файл

Файл: login_auto.py Проект: ty9065/myspider

def load_cookies3():
    """ 加载 cookie：cookies.txt -> load()  —— MozillaCookieJar格式
    """
    save_to_txt()
    cj = MozillaCookieJar()
    cj.load('localCookiesMoz.txt', ignore_discard=True,
            ignore_expires=True)  # 这里必须将参数置为True，否则登录失败
    for index, cookie in enumerate(cj):  # 显示cookies
        print('[', index, ']', cookie)
    return cj

Пример #13

Показать файл

Файл: utility.py Проект: Akkariin/PixivRss

def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ):
    global ABS_PATH

    cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' )

    try :
        cj.load( ABS_PATH + 'pixiv.cookie.txt' )
    except:
        pass # 还没有cookie只好拉倒咯

    ckproc = urllib2.HTTPCookieProcessor( cj )

    opener = urllib2.build_opener( ckproc )
    opener.addheaders = [
        ('Accept', '*/*'),
        ('Accept-Language', 'zh-CN,zh;q=0.8'),
        ('Accept-Charset', 'UTF-8,*;q=0.5'),
        ('Accept-Encoding', 'gzip,deflate'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'),
        ('Referer', refer)
    ]

    # 防止海外访问weibo变英文版
    if 'weibo.com' in url:
        opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')]

    debug('Network: url - ' + url)

    try:
        # 发出请求
        if data != '':
            debug('Network: post')
            debug(data)
            request = urllib2.Request( url = url, data = data )
            res = opener.open( request, timeout = 15 )
            cj.save() # 只有在post时才保存新获得的cookie
        else:
            debug('Network: get')
            res = opener.open( url, timeout = 15 )

        debug('Network: Status Code - ' + str(res.getcode()))

        return GetContent( res )

    except Exception, e:
        # 自动重试，每张图最多3次
        if retry > 0:
            return Get( url, data, refer, retry-1 )
        else:
            log(e, 'Error: unable to get %s' % url)
            return False

Пример #14

Показать файл

Файл: nrk.py Проект: stain/scrapbook

class NRK:
    def __init__(self):
        policy = DefaultCookiePolicy(
            rfc2965=True, strict_ns_domain=DefaultCookiePolicy.DomainStrict)
        self.cj = MozillaCookieJar(".cookies", policy)

        try:
            self.cj.load()
        except IOError, e:
            if e.errno != 2:
                raise e
            # else: Ignore "File not found"    
        self.opener = build_opener(HTTPCookieProcessor(self.cj))
        self.init()
        #self.login()
        self.setspeed()

Пример #15

Показать файл

Файл: test_login.py Проект: jacob22/accounting

    def login_test(self, provider):
        with self.app.test_request_context('https://localhost.admin.eutaxia.eu:5000/login',
                                           base_url='https://localhost.admin.eutaxia.eu:5000/'):
            resp = oauth.authorize(provider)
            assert resp.status_code == 302
            location = resp.headers['Location']
            session_data = dict(flask.session)

        cj = MozillaCookieJar(os.path.join(os.path.dirname(__file__), 'cookies.%s.txt' % provider))
        cj.load()

        class NoRedirectHandler(HTTPRedirectHandler):

            def redirect_request(self, req, fp, code, msg, hdrs, newurl):
                if newurl.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider):
                    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
                return HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)

        opener = build_opener(HTTPCookieProcessor(cj),
                              NoRedirectHandler())

        try:
            res = opener.open(location)
        except HTTPError as err:
            assert err.code == 302
            url = err.hdrs['Location']
            assert url.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider)
        else:
            if provider == 'windowslive':
                # Unfortunately we can't configure Windows Live to accept two separate
                # redirect URLs
                return
            else:
                assert False, 'Wrong redirect'

        with self.app.test_client() as c:
            with c.session_transaction() as session:
                session.update(session_data)

            query_string = urlparse(url).query
            resp = c.get('/login/%s' % provider, query_string=query_string)
            assert resp.status_code == 666

Пример #16

Показать файл

Файл: kiln.py Проект: szechyjs/dotfiles

def check_kilnauth_token(ui, url):
    cookiepath = _get_path('hgcookies')
    if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)):
        return ''
    cookiepath = os.path.join(cookiepath, md5(get_username(get_dest(ui))).hexdigest())

    try:
        if not os.path.exists(cookiepath):
            return ''
        cj = MozillaCookieJar(cookiepath)
    except IOError:
        return ''

    domain = get_domain(url)

    cj.load(ignore_discard=True, ignore_expires=True)
    for cookie in cj:
        if domain == cookie.domain:
            if cookie.name == 'fbToken':
                return cookie.value

Пример #17

Показать файл

def get_cookie(uname,pword,cookie_jar_path):
    print(cookie_jar_path)
    if os.path.isfile(cookie_jar_path):
       cookie_jar = MozillaCookieJar()
       cookie_jar.load(cookie_jar_path)
 
       ## make sure cookie is still valid
       print('*******************  FIRST COOKIE Check  ******************')
       if check_cookie(cookie_jar):
          print(" > Re-using previous cookie jar.")
          print('*******************  END FIRST COOKIE Check  ******************')
          return cookie_jar
       else:
          print(" > Could not validate old cookie Jar")
          cookie_jar = get_new_cookie(uname,pword,cookie_jar_path)
          check_cookie(cookie_jar)
    else:
        print('Could not find existing cookie jar -- Creating a new one')
        cookie_jar = get_new_cookie(uname,pword,cookie_jar_path)

    return cookie_jar

Пример #18

Показать файл

Файл: kiln.py Проект: istruble/dotfiles

def check_kilnauth_token(ui, url):
    cookiepath = _get_path('hgcookies')
    if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)):
        return ''
    cookiepath = os.path.join(cookiepath,
                              md5(get_username(get_dest(ui))).hexdigest())

    try:
        if not os.path.exists(cookiepath):
            return ''
        cj = MozillaCookieJar(cookiepath)
    except IOError:
        return ''

    domain = get_domain(url)

    cj.load(ignore_discard=True, ignore_expires=True)
    for cookie in cj:
        if domain == cookie.domain:
            if cookie.name == 'fbToken':
                return cookie.value

Пример #19

Показать файл

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        # try:
        # with open(cookiesfile, 'rb') as f:
        # cookie_processor = load(f)
        # except:
        # cookie_processor = HTTPCookieProcessor(CookieJar())
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
            f = open(cookiesfile)
        except IOError:
            f = open(cookiesfile, 'w')
            f.write(MozillaCookieJar.header)
        finally:
            f.close()
        cookiejar.load(filename=cookiesfile)
        cookie_processor = HTTPCookieProcessor(cookiejar)
        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))
        self.changestatus(status)

Пример #20

Показать файл

Файл: download-all-2020-01-23_23-11-03.py Проект: jdilger/ASF_GEE

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20200110T101421_20200110T101446_019753_025598_C902-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191229T101421_20191229T101446_019578_025007_DB2A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191217T101422_20191217T101447_019403_024A73_D2A9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191205T101422_20191205T101447_019228_0244DD_9778-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191123T101423_20191123T101448_019053_023F55_95B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191111T101423_20191111T101448_018878_0239B4_3FCF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191030T101423_20191030T101448_018703_02340F_3D8D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191018T101423_20191018T101448_018528_022E97_0AEB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191006T101423_20191006T101448_018353_022937_B959-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190912T101422_20190912T101447_018003_021E50_B3FB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190831T101421_20190831T101446_017828_0218D8_1ADE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190819T101421_20190819T101446_017653_021365_B751-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190807T101420_20190807T101445_017478_020DEF_A757-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101514_20190801T101539_028374_0334DB_E6C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101449_20190801T101514_028374_0334DB_6CA1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190726T101419_20190726T101444_017303_0208A8_2D9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190714T101419_20190714T101444_017128_020394_A8B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190702T101418_20190702T101443_016953_01FE6B_BE7D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190620T101417_20190620T101442_016778_01F93E_D609-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190608T101416_20190608T101441_016603_01F407_282F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190527T101416_20190527T101441_016428_01EECF_79D2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190515T101415_20190515T101440_016253_01E971_7A00-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190503T101415_20190503T101440_016078_01E3E6_D149-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190421T101414_20190421T101439_015903_01DE0C_E919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190409T101414_20190409T101439_015728_01D843_E7B3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190328T101413_20190328T101438_015553_01D27A_7404-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190316T101413_20190316T101438_015378_01CCBE_781F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190304T101413_20190304T101438_015203_01C713_17EF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190220T101413_20190220T101438_015028_01C151_EA49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190208T101413_20190208T101438_014853_01BB8C_940D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190127T101414_20190127T101439_014678_01B5D2_3B0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190115T101414_20190115T101439_014503_01B03A_4439-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190103T101414_20190103T101439_014328_01AA92_7D9B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181222T101415_20181222T101440_014153_01A4CF_3F05-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181210T101415_20181210T101440_013978_019F03_1C29-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181128T101416_20181128T101441_013803_01995A_6DD3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181116T101416_20181116T101441_013628_0193C1_FE12-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181104T101416_20181104T101441_013453_018E4D_0014-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101420_20181023T101445_013278_0188CC_5952-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101355_20181023T101420_013278_0188CC_0FA6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181011T101417_20181011T101442_013103_01835D_D0A0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180929T101416_20180929T101441_012928_017E0F_226F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180917T101416_20180917T101441_012753_0178B3_B66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180905T101415_20180905T101440_012578_017358_3259-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180824T101415_20180824T101440_012403_016DE5_85C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180812T101414_20180812T101439_012228_01687D_BCA9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180731T101414_20180731T101439_012053_01631A_ADBC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180719T101413_20180719T101438_011878_015DD1_3E69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180707T101412_20180707T101437_011703_015872_5055-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180625T101411_20180625T101436_011528_015300_5709-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180613T101411_20180613T101436_011353_014D8E_1799-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180601T101410_20180601T101435_011178_014821_B178-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180520T101409_20180520T101434_011003_014273_5667-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180508T101408_20180508T101433_010828_013CCB_18C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180426T101408_20180426T101433_010653_013720_457C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180414T101407_20180414T101432_010478_01318E_FB0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180402T101407_20180402T101432_010303_012BEA_9E94-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180321T101406_20180321T101431_010128_012640_6D69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180309T101406_20180309T101431_009953_01208C_4F7B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180225T101406_20180225T101431_009778_011AAD_2181-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180213T101407_20180213T101432_009603_0114ED_D868-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180201T101407_20180201T101432_009428_010F21_C8FA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180120T101407_20180120T101432_009253_010968_4DBE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180108T101408_20180108T101433_009078_0103B1_EB1D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171227T101408_20171227T101433_008903_00FDFF_A4F1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171215T101409_20171215T101434_008728_00F863_906F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171203T101409_20171203T101434_008553_00F2D6_B8D7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171121T101409_20171121T101434_008378_00ED57_D6D0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171028T101410_20171028T101435_008028_00E2F3_6DFC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171016T101410_20171016T101435_007853_00DDE1_829D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171004T101409_20171004T101434_007678_00D8F4_5C9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170922T101409_20170922T101434_007503_00D3F7_9FEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170910T101409_20170910T101434_007328_00CED7_2D8E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170829T101408_20170829T101433_007153_00C9B8_96C9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170817T101408_20170817T101433_006978_00C4A9_5D92-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170805T101407_20170805T101432_006803_00BF8B_4F73-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170724T101407_20170724T101432_006628_00BA88_2017-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170712T101406_20170712T101431_006453_00B58B_7674-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170630T101405_20170630T101430_006278_00B098_CAC7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170618T101404_20170618T101429_006103_00AB89_7D52-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170606T101404_20170606T101429_005928_00A666_6411-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170525T101403_20170525T101428_005753_00A14F_A827-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170513T101402_20170513T101427_005578_009C52_4E38-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170501T101402_20170501T101427_005403_009788_B5E9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170419T101401_20170419T101426_005228_009270_5637-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170407T101401_20170407T101426_005053_008D67_BB68-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170326T101400_20170326T101425_004878_008859_36E8-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170314T101400_20170314T101425_004703_008359_7A42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170302T101400_20170302T101425_004528_007E2B_F8A2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170206T101400_20170206T101425_004178_0073C4_69B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170113T101401_20170113T101426_003828_00695B_0B49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20161220T101403_20161220T101428_003478_005F1B_E6DD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161126T101403_20161126T101428_003128_005520_5BBB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161102T101404_20161102T101429_002778_004B45_F931-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161009T101404_20161009T101429_002428_00419A_2FD0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20160927T101404_20160927T101429_002253_003CAB_BC6E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101423_20160909T101448_012974_01487C_40C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101448_20160909T101513_012974_01487C_E55B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160816T101434_20160816T101503_012624_013CE1_AA51-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160723T101444_20160723T101513_012274_013152_9A67-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101426_20160629T101455_011924_0125E4_7F71-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101455_20160629T101520_011924_0125E4_D66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160605T101440_20160605T101505_011574_011AE7_0C49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160512T101439_20160512T101504_011224_010F91_FCE1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160418T101435_20160418T101500_010874_01048F_89B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160325T101434_20160325T101459_010524_00FA2B_4EAD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160301T101434_20160301T101459_010174_00F035_3B54-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101440_20160206T101505_009824_00E617_C31E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101415_20160206T101440_009824_00E617_D79A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160113T101434_20160113T101459_009474_00DBEE_5DBA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151220T101435_20151220T101500_009124_00D1EE_CFED-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151102T101442_20151102T101507_008424_00BE79_E2E7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151009T101442_20151009T101507_008074_00B50C_12FD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150915T101441_20150915T101506_007724_00ABB3_226C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150822T101441_20150822T101506_007374_00A234_599D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150729T101439_20150729T101504_007024_0098B2_E48E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150705T101438_20150705T101503_006674_008EB2_3496-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101442_20150518T101507_005974_007B3F_EFEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101417_20150518T101442_005974_007B3F_DF42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150424T101426_20150424T101451_005624_00734A_AD5A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101444_20150331T101509_005274_006AB8_213D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101419_20150331T101444_005274_006AB8_EBF3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101444_20150307T101509_004924_006269_7919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101419_20150307T101444_004924_006269_9089-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101443_20150211T101508_004574_005A0A_8FEE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101418_20150211T101443_004574_005A0A_4D0C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101444_20150118T101509_004224_00522A_42D5-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101419_20150118T101444_004224_00522A_26FE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101439_20141225T101504_003874_004A4B_D1FC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101414_20141225T101439_003874_004A4B_367E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101440_20141201T101505_003524_004254_556F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101415_20141201T101440_003524_004254_5C25-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101441_20141107T101506_003174_003A78_745C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101416_20141107T101441_003174_003A78_5D83-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101419_20141014T101444_002824_0032F1_B89E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101444_20141014T101509_002824_0032F1_B54D-PREDORB-10m-power-filt-rtc-gamma.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Пример #21

Показать файл

Файл: bilibili.py Проект: peiit/kodi_plugins

class Bilibili():
    name = u'哔哩哔哩 (Bilibili)'

    api_url = 'http://interface.bilibili.com/playurl?'
    bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
    SEC1 = '94aba54af9065f71de72f5508f1cd42e'
    SEC2 = '9b288147e5474dd2aa67085f716c560d'
    supported_stream_profile = [u'流畅', u'高清', u'超清']
    stream_types = [{
        'id': 'hdflv'
    }, {
        'id': 'flv'
    }, {
        'id': 'hdmp4'
    }, {
        'id': 'mp4'
    }, {
        'id': 'live'
    }, {
        'id': 'vc'
    }]
    fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)

    def __init__(self,
                 appkey=APPKEY,
                 appsecret=APPSECRET,
                 width=720,
                 height=480):
        self.defaultHeader = {'Referer': 'http://www.bilibili.com'}
        #self.defaultHeader = {}
        self.appkey = appkey
        self.appsecret = appsecret
        self.WIDTH = width
        self.HEIGHT = height
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            key = None
            for ck in self.cj:
                if ck.name == 'DedeUserID':
                    key = ck.value
                    break
            if key is not None:
                self.is_login = True
                self.mid = str(key)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

        try:
            os.remove(self._get_tmp_dir() + '/tmp.ass')
        except:
            pass

    def _get_tmp_dir(self):
        try:
            return tempfile.gettempdir()
        except:
            return ''

    def get_captcha(self, path=None):
        key = None
        for ck in self.cj:
            if ck.name == 'sid':
                key = ck.value
                break

        if key is None:
            get_html(
                LOGIN_CAPTCHA_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = get_html(
            LOGIN_CAPTCHA_URL.format(random()),
            decoded=False,
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path is None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = loads(
            get_html(
                LOGIN_HASH_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ''
        keys = params.keys()
        # must sorted.  urllib.urlencode(params) doesn't work
        keys.sort()
        for key in keys:
            data += '{}={}&'.format(key, urllib.quote(str(params[key])))

        data = data[:-1]  # remove last '&'
        if self.appsecret is None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(get_html(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/'
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/'
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(get_html(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20):
        if tag == 0:
            url = LIST_BY_ALL.format(tid, pagesize, page)
        else:
            url = LIST_BY_TAG.format(tag, tid, pagesize, page)

        results = loads(get_html(url))
        return results

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=20):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))

        result = loads(get_html(url, headers=self.defaultHeader))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                continue
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = loads(get_html(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = get_html(url, headers=self.defaultHeader)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = loads(get_html(url, headers=self.defaultHeader))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url))
        return result['data']['list']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_html('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = get_html(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })

        key = None
        for ck in self.cj:
            if ck.name == 'DedeUserID':
                key = ck.value
                break

        if key is None:
            return False, LOGIN_ERROR_MAP[loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(key)
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = loads(get_html(url, headers=self.defaultHeader))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]

        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        try:
            page = get_html(url)
            result = loads(page)
        except:
            result = {}
        return result

    # 调用niconvert生成弹幕的ass文件
    def parse_subtitle(self, cid):
        page_full_url = COMMENT_URL.format(cid)
        website = create_website(page_full_url)
        if website is None:
            return ''
        else:
            text = website.ass_subtitles_text(font_name=u'黑体',
                                              font_size=24,
                                              resolution='%d:%d' %
                                              (self.WIDTH, self.HEIGHT),
                                              line_count=12,
                                              bottom_margin=0,
                                              tune_seconds=0)
            f = open(self._get_tmp_dir() + '/tmp.ass', 'w')
            f.write(text.encode('utf8'))
            f.close()
            return 'tmp.ass'

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = parseString(get_html(url))
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            if re.match(r'.*\.qqvideo\.tc\.qq\.com', url):
                re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u)
            urls.append(u)
            #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/')))

        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        get_html(url)

    def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs):
        ts = str(int(time.time()))
        if not bangumi:
            params_str = 'cid={}&player=1&quality={}&ts={}'.format(
                cid, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest()
            api_url = self.api_url + params_str + '&sign=' + chksum
        else:
            mod = 'movie' if bangumi_movie else 'bangumi'
            params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(
                cid, mod, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest()
            api_url = self.bangumi_api_url + params_str + '&sign=' + chksum

        return get_html(api_url)

    def download_by_vid(self, cid, bangumi, **kwargs):
        stream_id = kwargs.get('stream_id')
        if stream_id and stream_id in self.fmt2qlt:
            quality = stream_id
        else:
            quality = 'hdflv' if bangumi else 'flv'

        level = kwargs.get('level', 0)
        xml = self.api_req(cid, level, bangumi, **kwargs)
        doc = parseString(xml)
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            #urls.append(u)
            urls.append(
                urllib.quote_plus(u + '|Referer=https://www.bilibili.com'))

        return urls

    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"',
                                 self.page)
        if tc_flashvars:
            tc_flashvars = tc_flashvars.group(1)
        if tc_flashvars is not None:
            self.out = True
            return qq_download_by_vid(tc_flashvars,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])

        cid = re.search(r'cid=(\d+)', self.page).group(1)
        if cid is not None:
            return self.download_by_vid(cid, False, **kwargs)
        else:
            # flashvars?
            flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
            if flashvars is None:
                raise Exception('Unsupported page {}'.format(self.url))
            param = flashvars.split('&')[0]
            t, cid = param.split('=')
            t = t.strip()
            cid = cid.strip()
            if t == 'vid':
                sina_download_by_vid(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            elif t == 'ykid':
                youku_download_by_vid(cid,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])
            elif t == 'uid':
                tudou_download_by_id(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            else:
                raise NotImplementedError(
                    'Unknown flashvars {}'.format(flashvars))
            return

    def movie_entry(self, **kwargs):
        patt = r"var\s*aid\s*=\s*'(\d+)'"
        aid = re.search(patt, self.page).group(1)
        page_list = loads(
            get_html(
                'http://www.bilibili.com/widget/getPageList?aid={}'.format(
                    aid)))
        # better ideas for bangumi_movie titles?
        self.title = page_list[0]['pagename']
        return self.download_by_vid(page_list[0]['cid'],
                                    True,
                                    bangumi_movie=True,
                                    **kwargs)

    def get_video_from_url(self, url, **kwargs):
        self.url = url_locations(url)
        frag = urlparse(self.url).fragment
        # http://www.bilibili.com/video/av3141144/index_2.html#page=3
        if frag:
            hit = re.search(r'page=(\d+)', frag)
            if hit is not None:
                page = hit.group(1)
                av_id = re.search(r'av(\d+)', self.url).group(1)
                self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(
                    av_id, page)
        self.page = get_html(self.url)

        if 'bangumi.bilibili.com/movie' in self.url:
            return self.movie_entry(**kwargs)
        elif 'bangumi.bilibili.com' in self.url:
            return self.bangumi_entry(**kwargs)
        elif 'live.bilibili.com' in self.url:
            return self.live_entry(**kwargs)
        elif 'vc.bilibili.com' in self.url:
            return self.vc_entry(**kwargs)
        else:
            return self.entry(**kwargs)

    def bangumi_entry(self, **kwargs):
        pass

    def live_entry(self, **kwargs):
        pass

    def vc_entry(self, **kwargs):
        pass

Пример #22

Показать файл

Файл: danabox.py Проект: danabox/deis

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Пример #23

Показать файл

Файл: cli.py Проект: nrao/mynrao

        # way it won't abort unless the user has configured it.
        https_handler = urllib2.HTTPSHandler
        if options.ca_certs:
            from caslib.validating_https import ValidatingHTTPSConnection

            class HTTPSConnection(ValidatingHTTPSConnection):
                ca_certs = options.ca_certs

            https_handler = HTTPSConnection.HTTPSHandler

        opener = urllib2.build_opener(https_handler)

        if options.cookiejar:
            cookiejar = MozillaCookieJar(os.path.expanduser(options.cookiejar))
            try:
                cookiejar.load(ignore_discard=True)
            except IOError:
                pass
            opener.add_handler(urllib2.HTTPCookieProcessor(cookiejar=cookiejar))

        if not options.verbose:
            logging.basicConfig(level=logging.WARNING)
        elif options.verbose == 1:
            logging.basicConfig(level=logging.INFO)
        else:
            logging.basicConfig(level=logging.DEBUG)
        userdb = NRAOUserDB(options.location, options.username, options.password, opener)

        for key in args:
            if options.query_by == DATABASE_ID:
                user = userdb.get_user_data(database_id=key)

Пример #24

Показать файл

Файл: bilibili.py Проект: brmnh/xbmc-addons-chinese

class Bilibili():
    def __init__(self, appkey = APPKEY, appsecret = APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path = None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                   headers = {'Referer':'https://passport.bilibili.com/login'})
        result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                        headers = {'Referer':'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()),
                                                   headers={'Referer':'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey']=self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []}
        category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []}

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid = '0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10):
        params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize}
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd)
        result = utils.get_page_content(LOGIN_URL, data,
                                        {'Origin':'https://passport.bilibili.com',
                                         'Referer':'https://passport.bilibili.com/login'})
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        urls = [url
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
                for url in urls]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Пример #25

Показать файл

Файл: cookieway.py Проект: yatogamitohka1212/usefultool

class CookieWay:
    def __init__(self):
        self.cookiejar = MozillaCookieJar()

    def load(self, file="cookie.txt"):
        self.cookiejar.load(file, ignore_discard=True, ignore_expires=True)

    def save(self, file="cookie.txt"):
        self.cookiejar.save(file, ignore_discard=True, ignore_expires=True)

    def torequestscj(self, s):
        for item in self.cookiejar:
            cookiesobject = requests.cookies.create_cookie(domain=item.domain,
                                                           name=item.name,
                                                           value=item.value)
            s.cookies.set_cookie(cookiesobject)

    def toseleniumcj(self, driver):
        domains = []
        for item in self.cookiejar:
            if item.domain not in domains:
                domains.append(item.domain)
        for i in range(len(domains)):
            if domains[i][0:1] == ".":
                domains[i] = domains[i][1:]
        domains = list(set(domains))
        for item in domains:
            driver.get("https://" + item)
            for item2 in self.cookiejar:
                if item2.domain == item or item2.domain == "." + item:
                    cookie_dict = {
                        'domain': item2.domain,
                        'name': item2.name,
                        'value': item2.value,
                        'secure': item2.secure
                    }
                    if item2.path_specified:
                        cookie_dict['path'] = item2.path
                    driver.add_cookie(cookie_dict)

    def sele2resq(self, driver, s):
        self.selcj_cj(driver)
        self.torequestscj(s)

    def resq2sele(self, s, driver):
        self.reqcj_cj(s)
        self.toseleniumcj(driver)

    def selcj_cj(self, driver):
        cookie = driver.get_cookies()
        for s_cookie in cookie:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie['name'],
                    value=s_cookie['value'],
                    port='80',
                    port_specified=False,
                    domain=s_cookie['domain'],
                    domain_specified=True,
                    domain_initial_dot=False,
                    path=s_cookie['path'],
                    path_specified=True,
                    secure=s_cookie['secure'],
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

    def reqcj_cj(self, s):
        for s_cookie in s.cookies:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie.name,
                    value=s_cookie.value,
                    port='80',
                    port_specified=False,
                    domain=s_cookie.domain,
                    domain_specified=True,
                    domain_initial_dot=False,
                    path="/",
                    path_specified=True,
                    secure=True,
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

Пример #26

Показать файл

Файл: scan.py Проект: clicknull/futurescan

class HttpScan(DummyScan):

    def __init__(self, args):
        super(HttpScan, self).__init__(args)
        self.session = requesocks.session()

        adapters.DEFAULT_RETRIES = self.args.max_retries
        self.tor = None
        if self.args.tor:
            self.out.log("Enabling TOR")
            self.tor = Torify()
            self.session.proxies = {'http': 'socks5://127.0.0.1:9050',
                                    'https': 'socks5://127.0.0.1:9050'}
            if self.args.check_tor:
                # Check TOR
                self.out.log("Checking IP via TOR")
                rip, tip = self.tor.check_ip(verbose=True)
                if tip is None:
                    self.out.log('TOR is not working properly!', logging.ERROR)
                    exit(-1)

        if self.args.cookies is not None:
            if path.exists(self.args.cookies) and path.isfile(self.args.cookies):
                self.cookies = MozillaCookieJar(self.args.cookies)
                self.cookies.load()
            else:
                # self.out.log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR)
                self.cookies = Cookies.from_request(self.args.cookies)
        else:
            self.cookies = None

        self.ua = UserAgent() if self.args.user_agent is None else self.args.user_agent

    def filter(self, response):
        if response is None:
            return False

        # Filter responses and save responses that are matching ignore, allow rules
        if (self.args.allow is None and self.args.ignore is None) or \
                (self.args.allow is not None and response.status_code in self.args.allow) or \
                (self.args.ignore is not None and response.status_code not in self.args.ignore):
            # TODO: add regex search
            return True

        return False

    def scan_url(self, url):
        # TODO: add options
        r = None
        ex = None
        try:
            r = self.session.get(url)
        except Exception as e:
            ex = e
        finally:
            self.cb_response(url, r, ex)
        return r, ex

    def scan_host(self, host, urls):
        res = []

        for u in urls:
            url = get_full_url(host, u)
            r, ex = self.scan_url(url)
            self.out.logger.write_response(url, r, ex)
            if self.filter(r):
                self.out.write(url, r, ex)
                res.append((url, r, ex))
        return res

    def cb_scan_done(self, future):
        pass

    def cb_response(self, url, reponse, exception):
        pass

Пример #27

Показать файл

Файл: textcrawler.py Проект: baloncek2662/data-extraction-methods

class SimpleCrawler:

    USER_AGENT = 'SimpleCrawler/0.1'
    HEADERS = {
        'User-Agent': USER_AGENT,
        'Accept-Encoding': 'gzip',
        'Connection': 'keep-alive'
    }
    CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I)

    def __init__(self,
                 starturl,
                 index_html='',
                 maxlevel=1,
                 cookie_file=None,
                 acldb=None,
                 urldb=None,
                 default_charset=None,
                 delay=0,
                 timeout=300,
                 debug=0):
        (proto, self.hostport, _x, _y, _z) = urlsplit(starturl)
        # assert proto == 'http'
        #Thread.__init__(self)
        self.debug = debug
        self.index_html = index_html
        if cookie_file:
            self.cookiejar = MozillaCookieJar(cookie_file)
            self.cookiejar.load()
        else:
            self.cookiejar = None
        self.robotstxt = RobotFileParser()
        self.robotstxt.set_url(urljoin(starturl, '/robots.txt'))
        # self.robotstxt.read()
        self.conn = None
        self.urldb = urldb
        self.acldb = acldb
        self.curlevel = 0
        self.delay = delay
        self.timeout = timeout
        self.default_charset = default_charset
        if starturl.endswith('/'):
            starturl += self.index_html
        self.urls = [(starturl, maxlevel)]
        self.crawled = {}  # 1:injected, 2:crawled
        return

    def accept_url(self, url):
        if url.endswith('/'):
            url += self.index_html
        if self.acldb and not self.acldb.allowed(url):
            return None
        return url

    def inject_url(self, url):
        if (not self.curlevel) or (not url) or (url in self.crawled):
            return False
        if not self.robotstxt.can_fetch(self.USER_AGENT, url):
            if self.debug:
                print >> stderr, 'DISALLOW: %r' % url
            return None
        if self.debug:
            print >> stderr, 'INJECT: %r' % url
        self.crawled[url] = 1
        self.urls.append((url, self.curlevel - 1))
        return True

    def get1(self, url, maxretry=5, maxredirect=5):
        if self.debug:
            print >> stderr, 'GET: %r' % url
        # loop
        for rtry in range(maxredirect):
            # forge urllib2.Request object.
            req = Request(url)
            # add cookie headers if necessary.
            if self.cookiejar:
                self.cookiejar.add_cookie_header(req)
                headers = req.unredirected_hdrs
                headers.update(self.HEADERS)
            else:
                headers = self.HEADERS
            # get response.
            for ctry in range(maxretry):
                try:
                    if not self.conn:
                        print >> stderr, 'Making connection: %r...' % (
                            self.hostport, )
                        self.conn = HTTPConnection(self.hostport)
                    self.conn.request('GET',
                                      req.get_selector().replace(' ', ''), '',
                                      headers)
                    # self.conn.sock.settimeout(self.timeout)
                    resp = self.conn.getresponse()
                    break
                except BadStatusLine, x:
                    # connection closed unexpectedly
                    print >> stderr, 'Connection closed unexpectedly.'
                    # it restarts the connection...
                    self.conn.close()
                    self.conn = None
                except socket.error, x:
                    # connection closed unexpectedly
                    print >> stderr, 'Socket error:', x
                    self.conn.close()
                    self.conn = None
            else:

Пример #28

Показать файл

class Bilibili():
    def __init__(self, appkey=APPKEY, appsecret=APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(
                    self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(
                    requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path=None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(
                LOGIN_CAPTCHA_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = utils.get_page_content(
            LOGIN_CAPTCHA_URL.format(random.random()),
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(
            utils.get_page_content(
                LOGIN_HASH_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs': []}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/',
            'subs': []
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/',
            'subs': []
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs': []}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{
            tid: {
                'title': '全部',
                'url': CATEGORY[tid]['url'],
                'subs': []
            }
        }]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=10):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = utils.get_page_content(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })
        if not requests.utils.dict_from_cookiejar(
                self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(
            requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [
            durl.getElementsByTagName('url')[0].firstChild.nodeValue
            for durl in doc.getElementsByTagName('durl')
        ]
        urls = [
            url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(
                r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
            for url in urls
        ]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Пример #29

Показать файл

Файл: basis_retr.py Проект: jshrader/student_sleep

class BasisRetr:
	"""The main entry points, once a BasisRetr object has been created, are: 1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it, 2) GetActivityCsvForMonth()-- download activity summaries for an entire month, and 3) GetSleepCsvForMonth()--download sleep summaries for an entire month."""
	LOGIN_URL = 'https://app.mybasis.com/login'
	UID_URL = 'https://app.mybasis.com/api/v1/user/me.json'
	METRICS_URL = 'https://app.mybasis.com/api/v1/chart/{userid}.json?interval=60&units=s&start_date={date}&start_offset=0&end_offset=0&summary=true&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true'
	ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep'
	SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep'
	SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events'

	DATE_FORMAT = "%04d-%02d-%02d"
	
	# save-to filename.  date is prefix, format is suffix
	MO_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_activities_summary.csv"
	MO_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_sleep_summary.csv"
	# day sleep and activity filenames (for month summaries)
	DAY_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_activities.json"
	DAY_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_sleep.json"
	DAY_JSON_FNAME_TEMPLATE = "{date}_basis_{typ}.json"
	METRICS_FNAME_TEMPLATE = "{date}_basis_metrics.{ext}"
	SLEEP_FNAME_TEMPLATE= "{date}_basis_sleep.{format}"
	
	def __init__(self, loadconfig = None):
		# create config info
		self.cfg = Config(cfg_items = CFG_ITEMS)
		if loadconfig:
			self.cfg.Load()
		else:
			# if config file doesn't exist, save the defaults loaded above
			self.cfg.Save() #saves 
		# url opener for website retrieves
		opener = urllib2.build_opener()
		self.cj = MozillaCookieJar(self.cfg.cookie_filename)#BasisRetr.COOKIE_FILENAME)
		self.session_cookie = None
		if os.path.exists(self.cfg.cookie_filename):#BasisRetr.COOKIE_FILENAME):
			self.cj.load()
			self.CheckSessionCookie() # set session cookie if it exists and hasn't expired
		# need to use build_opener to submit cookies and post form data
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

	def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True):
		"""Main entry method for getting a day's worth of data, formatting, then saving it.  typ is the type of data: metrics, activities, or sleep.  Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json.  act_metr, if True, saves sleep and activity state along with metrics."""
		date = BasisRetr.DATE_FORMAT % (yr, mo, day)
		ydate = self.GetYesterdayDateAsString(yr, mo, day)
		
		self.Status("Checking Login")
		self.CheckLogin() # ensure we're logged in
		self.Status("getting {} for {}".format(typ,date))
		# figure out which data to get
		data = None
		# filename 
		cfname = "{date}_basis_{typ}.csv".format(date=date, typ=typ)
				
		# if needed, download json data from website and save to file
		if typ == 'metrics':
			mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache)
			### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD
			if type(mjdata) == str or mjdata == None: # simple error checking
				self.Status('OnGetDayData: Metrics json conversion failed.')
				print mjdata[:500]
				return
			# also load up actities
		if typ == 'activities' or act_metr:
			ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache)
			if type(ajdata) == str or ajdata == None: # simple error checking
				self.Status('OnGetDayData: Activities json conversion failed.')
				print ajdata[:500]
				return
		if typ == 'sleep' or act_metr:
			sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache)
			if type(sjdata) == str or sjdata == None: # simple error checking
				self.Status('OnGetDayData: Sleep json conversion failed.')
				print sjdata[:500]
				return
			if act_metr: # add yesterday's sleep data
				sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep')
		
		# Next, turn the list of python objects into a csv file.
		# If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp.
		cdata = None
		if save_csv:
			if typ == 'activities' or act_metr:
				act_list = self.JsonActivitiesToList(ajdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_activity_colnames, act_list)
			if typ == 'sleep' or act_metr:
				sleep_evts_list = self.JsonSleepEventsToList(sjdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_sleep_evt_colnames, sleep_evts_list)
				if act_metr:
					# prepend yesterday's sleep events as they may start before midnight.
					sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2)
			if typ == 'metrics':
				metrics_list = self.JsonMetricsToList(mjdata)
				if act_metr: # add activities to metrics               
					self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list)
					header = self.cfg.csv_metrics_colnames + self.cfg.csv_activity_type_colnames
				else:
					header = self.cfg.csv_metrics_colnames
				cdata = self.CreateCSVFromList(header, metrics_list)
		
		# If we were able to make a csv file, save it.
		if cdata:
			fpath = os.path.join(os.path.abspath(self.cfg.savedir), cfname)
			self.SaveData(cdata, fpath)
			self.Status("Saved "+typ+" csv file at "+fpath)

	def CheckLogin(self):
		# the test below gives HTTP Error 401: Unauthorized if don't get cookie each time
		# I wonder if cookielib::FileCookieJar might do the right thing
		# i.e., save all cookies, not just session cookie.
		if not self.cfg.userid or not self.cfg.session_token:
			self.Login()

	def Login(self, login = None, passwd = None):
		"""Log in to basis website to get session (access) token via cookie. Don't need to pass in loginid and password if want to use stored info."""
		if login:
			self.cfg.loginid = login
		if passwd:
			self.cfg.passwd = passwd

		form_data = {'next': 'https://app.mybasis.com',
			'submit': 'Login',
			'username': self.cfg.loginid,
			'password': self.cfg.passwd}
		enc_form_data = urllib.urlencode(form_data)
		f = self.opener.open(BasisRetr.LOGIN_URL, enc_form_data)

		content = f.read()
		#$ do we need to close f?
		m = re.search('error_string\s*=\s*"(.+)"', content, re.MULTILINE)
		if m:
			raise Exception(m.group(1))
		
		self.CheckSessionCookie()
		
		# make sure we got the access token
		if not self.cfg.session_token:
			self.Status("Didn't find an access token in:"+["({}={}), ".format(c.name.c.value) for c in self.cj])
		else:
			self.Status("Logged in, Got Access Token = "+self.cfg.session_token)
		
	def CheckSessionCookie(self):
		for cookie in self.cj:
			if cookie.name == 'access_token':
				self.cfg.session_token = cookie.value

	def GetUserID(self):
		"""Retrieve the long hex string that uniquely identifies a user from the Basis website."""
		if not self.cfg.session_token:
			raise Exception('no token', 'no access token found-may be internet connectivity or bad login info.')
		self.opener.addheaders = [('X-Basis-Authorization', "OAuth "+self.cfg.session_token)]
		f = self.opener.open(BasisRetr.UID_URL)
		content = f.read()
		jresult = json.loads(content)
		self.cfg.userid= None
		if 'id' in jresult:
			self.cfg.userid = jresult['id']

	def GetYesterdayDateAsString(self, yr, mo, day):
		"""Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep."""
		
		tday, tmo, tyr = day-1, mo, yr
		
		if tday <1: # previous month
			tmo -= 1
			if tmo < 1: # previous year
				tyr -= 1
				tmo = 12
			# once we adjusted the month, find the last day of that month 
			tday = calendar.monthrange(tyr, tmo)[1]
		tdate	 = BasisRetr.DATE_FORMAT % (tyr, tmo, tday)
		return tdate

	def RetrieveMetricsJsonForDay(self, date):
		# Need userid in order to get metrics
		if not self.cfg.userid:
			self.Status("BasisRetr::GetMetrics: No userid available; getting from website.")
			self.GetUserID()
			self.Status("Retrieved userid from website.")
		# Form the URL
		url = BasisRetr.METRICS_URL.format(date=date,userid=self.cfg.userid)
		return self.GetJsonData(url)
		
	def RetrieveActivitiesJsonForDay(self, date):
		url = BasisRetr.ACTIVITIES_URL.format(date = date)
		return self.GetJsonData(url)

	def RetrieveSleepSummaryJsonForDay(self, date):
		url = BasisRetr.SLEEP_URL.format(date=date)
		return self.GetJsonData(url)

	def RetrieveSleepEventsJsonForDay(self,date):
		url = BasisRetr.SLEEP_EVENTS_URL.format(date=date)
		return self.GetJsonData(url)

	def GetJsonStorageDir(self):
		"""Allow json storage dir to be absolute or relative (to csv dir) path."""
		if os.path.isabs(self.cfg.jsondir):
			return self.cfg.jsondir
		else:
			return os.path.join(os.path.abspath(self.cfg.savedir), self.cfg.jsondir)

	def RetrieveJsonOrCached(self, date, typ, user_override_cache = None):
		"""If json file exists in json dir, then just read that.  Otherwise, download from basis website.  If override_cache is set, always download from website."""
		fname = BasisRetr.DAY_JSON_FNAME_TEMPLATE.format(date=date, typ=typ)
		fpath = os.path.join(self.GetJsonStorageDir(), fname)
		# don't use cache if the saved data is very recent-- what's saved may have been before the end of the day.
		if os.path.isfile(fpath):
			# these calculations are in seconds since epoch
			days_prev = 3600*24*self.cfg.nocache_days
			last_mod_time = os.path.getmtime(fpath)
			target_time = time.mktime(datetime.datetime.strptime(date, "%Y-%m-%d").timetuple())
			force_override_cache = last_mod_time - target_time < days_prev
		# if file exists and we've said via UI, "don't override the cache", then read json from cache
		if os.path.isfile(fpath) and not user_override_cache and not force_override_cache:
			with open(fpath, "r") as f:
				data = f.read()
				jdata = json.loads(data)
				
		else: # retrieve data from website
			if typ == 'metrics':
				jdata = self.RetrieveMetricsJsonForDay(date)
			elif typ == 'activities':
				jdata = self.RetrieveActivitiesJsonForDay(date)
			elif typ == 'sleep':
				jdata = self.RetrieveSleepEventsJsonForDay(date)
			elif typ == 'sleep_summary':
				jdata = self.RetrieveSleepSummaryJsonForDay(date)
			#json_path = os.path.join(self.GetJsonStorageDir(), fname)
			# make sure directory exists
			if not os.path.isdir(self.GetJsonStorageDir()):
				os.makedirs(self.GetJsonStorageDir())
			self.SaveData(json.dumps(jdata), fpath)
		return jdata
	
		
	def GetJsonData(self, url):
		if DEBUG:
			print url
		if True:
			try:			
				f = self.opener.open(url)
				jresult= json.loads(f.read())
			except urllib2.HTTPError as e:
				reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code]
				jresult = {'code': e.code, 'error':reason, 'url':url} 
		
		# callback (if available) to UI manager to ensure it doesn't freeze
		if hasattr(self, 'FreezePrevention'):
			self.FreezePrevention()
		if 'code' in jresult and jresult['code'] == 401: # unauthorized.  try logging in
			self.Status("Auth error, Logging in for new session token.")
			self.Login()
			try:	# try again
				f = self.opener.open(url)
				jresult= json.loads(f.read())
			except urllib2.HTTPError as e:
				reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code]
				jresult = {'code': e.code, 'error':reason, 'url':url} 
		return jresult

	def SaveData(self, data, fpath):
		try:
			fh = file(os.path.abspath(fpath), "w")
			fh.write(data)
		except IOError, v:
			self.Status("problem saving file to:"+fpath+"\n--Error: "+`v`)
		try: # if problem is on open, then fh doesn't exist.
			fh.close()
		except:
			pass

Пример #30

Показать файл

Файл: httpscan.py Проект: Badikov/httpscan

class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' % self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename, logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename, logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.",
                                          logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log("TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {"https": self.args.proxy,
                                    "http": self.args.proxy}

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(self.args.load_cookies):
                self.output.print_and_log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers['allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(host, headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host, logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log('Errors limit reached on %s Skipping other urls.' % host, logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url,
               'response': response,
               'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']) if 'content-length' in response.headers else len(
                response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)),
                logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(method, url, headers=self._fill_headers(),
                                            allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url, logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url, logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url, logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR)
            exception = ex


        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...', logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log('To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After ICMP scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log('SYN scan via tor or proxy is impossible!', logging.WARNING)
                self.output.print_and_log('Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log('To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log('Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)

Пример #31

Показать файл

class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' %
                                  self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename,
                                      logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0,
                       open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename,
                                      logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "Couldn't get real IP address. Check yout internet connection.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "TOR socks proxy doesn't seem to be working.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' %
                                      (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log(
                    "TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {
                "https": self.args.proxy,
                "http": self.args.proxy
            }

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(
                    self.args.load_cookies):
                self.output.print_and_log(
                    'Could not find cookie file: %s' % self.args.load_cookies,
                    logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers[
                'allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(
                host,
                headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host,
                                  logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log(
                    'Errors limit reached on %s Skipping other urls.' % host,
                    logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url, 'response': response, 'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']
                         ) if 'content-length' in response.headers else len(
                             response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s"
                % (url, str(exception)), logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(
                method,
                url,
                headers=self._fill_headers(),
                allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url,
                                  logging.ERROR)
            exception = ex

        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...',
                                  logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log(
                    'To use ICMP scan option you must run as root. Skipping ICMP scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After ICMP scan %i hosts %i urls loaded, %i urls to scan'
                    %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log(
                    'SYN scan via tor or proxy is impossible!',
                    logging.WARNING)
                self.output.print_and_log(
                    'Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log(
                    'To use SYN scan option you must run as root. Skipping SYN scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports,
                                             self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log(
                'Too many threads! Fixing threads count to %i' %
                self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)

Пример #32

Показать файл

Файл: textcrawler.py Проект: dreamfrog/jophiel

class SimpleCrawler:

  USER_AGENT = 'SimpleCrawler/0.1'
  HEADERS = {
    'User-Agent': USER_AGENT,
    'Accept-Encoding': 'gzip',
    'Connection': 'keep-alive'
    }
  CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I)
  
  def __init__(self, starturl, index_html='', maxlevel=1,
               cookie_file=None, acldb=None, urldb=None, default_charset=None,
               delay=0, timeout=300, debug=0):
    (proto, self.hostport, _x, _y, _z) = urlsplit(starturl)
    assert proto == 'http'
    #Thread.__init__(self)
    self.debug = debug
    self.index_html = index_html
    if cookie_file:
      self.cookiejar = MozillaCookieJar(cookie_file)
      self.cookiejar.load()
    else:
      self.cookiejar = None
    self.robotstxt = RobotFileParser()
    self.robotstxt.set_url(urljoin(starturl, '/robots.txt'))
    self.robotstxt.read()
    self.conn = None
    self.urldb = urldb
    self.acldb = acldb
    self.curlevel = 0
    self.delay = delay
    self.timeout = timeout
    self.default_charset = default_charset
    if starturl.endswith('/'):
      starturl += self.index_html
    self.urls = [(starturl, maxlevel)]
    self.crawled = {}                   # 1:injected, 2:crawled
    return

  def accept_url(self, url):
    if url.endswith('/'):
      url += self.index_html
    if self.acldb and not self.acldb.allowed(url):
      return None
    return url
  
  def inject_url(self, url):
    if (not self.curlevel) or (not url) or (url in self.crawled): return False
    if not self.robotstxt.can_fetch(self.USER_AGENT, url):
      if self.debug:
        print >>stderr, 'DISALLOW: %r' % url
      return None
    if self.debug:
      print >>stderr, 'INJECT: %r' % url
    self.crawled[url] = 1
    self.urls.append((url, self.curlevel-1))
    return True

  def get1(self, url, maxretry=3, maxredirect=3):
    if self.debug:
      print >>stderr, 'GET: %r' % url
    # loop
    for rtry in range(maxredirect):
      # forge urllib2.Request object.
      req = Request(url)
      # add cookie headers if necessary.
      if self.cookiejar:
        self.cookiejar.add_cookie_header(req)
        headers = req.unredirected_hdrs
        headers.update(self.HEADERS)
      else:
        headers = self.HEADERS
      # get response.
      for ctry in range(maxretry):
        try:
          if not self.conn:
            print >>stderr, 'Making connection: %r...' % (self.hostport,)
            self.conn = HTTPConnection(self.hostport)
          self.conn.request('GET', req.get_selector().replace(' ',''), '', headers)
	  self.conn.sock.settimeout(self.timeout)
          resp = self.conn.getresponse()
          break
        except BadStatusLine, x:
          # connection closed unexpectedly
          print >>stderr, 'Connection closed unexpectedly.'
          # it restarts the connection...
          self.conn.close()
          self.conn = None
        except socket.error, x:
          # connection closed unexpectedly
          print >>stderr, 'Socket error:', x
          self.conn.close()
          self.conn = None
      else:

Пример #33

Показать файл

class querier(object):
    proxy = None

    def _get_page(self, pagerequest):
        """Return the data for a page on scholar.google.com"""
        # Note that we include a sleep to avoid overloading the scholar server
        time.sleep(max((5, random.uniform(0, 5))))
        # resp = _SESSION.get(pagerequest, headers=_HEADERS, cookies=_COOKIES)
        req = Request(url=pagerequest, headers=_HEADERS)
        if self.proxy:
            r = self.proxy.get(pagerequest, options={})

            while not r['body']:
                # Error on request through the proxy
                print("Error while communicating, trying again ...")
                r = self.proxy.get(pagerequest, options={})

            return r['body']
        hdl = urlopen(req)

        return hdl.read()
        # if html.status_code == 200:
        #     return resp.text
        # else:
        #     raise Exception('Error: {0} {1}'.format(html.status_code, html.reason))

    def _get_soup(self, pagerequest):
        """Return the BeautifulSoup for a page on scholar.google.com"""
        html = self._get_page(pagerequest)
        return BeautifulSoup(html, 'html.parser')

    def _search_scholar_soup(self, soup):
        """Generator that returns Publication objects from the search page"""
        while True:
            for row in soup.find_all('div', 'gs_r'):
                # Small fix: leave out the first entry, which is the article being searched
                if row.find('div', class_='gs_ri') is None:
                    continue
                yield Publication(row, self, 'scholar')
            if soup.find(class_='gs_ico gs_ico_nav_next'):
                url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href']
                soup = self._get_soup(_HOST + url)
            else:
                break

    def _search_citation_soup(self, soup):
        """Generator that returns Author objects from the author search page"""
        while True:
            for row in soup.find_all('div', 'gsc_1usr'):
                yield Author(row)
            next_button = soup.find(
                class_=
                'gs_btnPR gs_in_ib gs_btn_half gs_btn_lsb gs_btn_srt gsc_pgn_pnx'
            )
            if next_button and 'disabled' not in next_button.attrs:
                url = next_button['onclick'][17:-1]
                url = codecs.getdecoder("unicode_escape")(url)[0]
                soup = self._get_soup(_HOST + url)
            else:
                break

    def search_pubs_query(self, query, years=None):
        """Search by scholar query and return a generator of Publication objects"""
        url = _PUBSEARCH.format(requests.utils.quote(query.encode('utf8')))
        if years is not None:
            if isinstance(years, list):
                url += '&as_ylo=' + str(min(years)) + '&as_yhi=' + str(
                    max(years))
            if isinstance(years, int):
                url += '&as_ylo=' + str(years) + '&as_yhi=' + str(years)

        soup = self._get_soup(_HOST + url)

        return self._search_scholar_soup(soup)

    def search_author(self, name):
        """Search by author name and return a generator of Author objects"""
        url = _AUTHSEARCH.format(requests.utils.quote(name))
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def search_keyword(self, keyword):
        """Search by keyword and return a generator of Author objects"""
        url = _KEYWORDSEARCH.format(requests.utils.quote(keyword))
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def search_pubs_custom_url(self, url):
        """Search by custom URL and return a generator of Publication objects
        URL should be of the form '/scholar?q=...'"""
        soup = self._get_soup(_HOST + url)
        return self._search_scholar_soup(soup)

    def search_author_custom_url(self, url):
        """Search by custom URL and return a generator of Publication objects
        URL should be of the form '/citation?q=...'"""
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def save_cookies(self):
        """
        This stores the latest cookies we're using to disk, for reuse in a
        later session.
        """
        try:
            self.cjar.save(self.cFile, ignore_discard=True)
            return True
        except Exception as msg:
            return False

    def set_proxy(self, token):
        from proxycrawl import ProxyCrawlAPI

        self.proxy = ProxyCrawlAPI({'token': token})

    def reset_proxy(self):
        self.proxy = None

    def __init__(self, cookie_file=''):
        self.cjar = MozillaCookieJar()
        self.cFile = cookie_file

        # If we have a cookie file, load it:
        if self.cFile and exists(self.cFile):
            try:
                self.cjar.load(self.cFile, ignore_discard=True)
            except Exception as msg:
                self.cjar = MozillaCookieJar()  # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))

    def __del__(self):
        self.save_cookies()

Пример #34

Показать файл

def get_url(url,
            config,
            additional_headers=None,
            additional_query_string=None,
            post_data=None,
            fail_silent=False,
            no_cache=False,
            return_json_errors=[],
            return_final_url=False,
            cookie_file=None):

	response_content = ''
	request_hash = sha512(
	        (url + dumps(additional_headers) + dumps(additional_query_string) + dumps(post_data)).encode('utf-8')).hexdigest()

	final_url = url

	if xbmc_helper().get_bool_setting('debug_requests') is True:
		xbmc_helper().log_debug(
		        'get_url - url: {} headers {} query {} post {} no_cache {} silent {} request_hash {} return_json_errors {}, cookie_file',
		        url, additional_headers, additional_query_string, post_data, no_cache, fail_silent, request_hash, return_json_errors,
		        cookie_file)

	if no_cache is True:
		etags_data = None
	else:
		etags_data = get_etags_data(request_hash)

	try:

		headers = {
		        'Accept-Encoding': 'gzip, deflate',
		        'User-Agent': config['USER_AGENT'],
		        'Accept': '*/*',
		}

		if additional_headers is not None:
			headers.update(additional_headers)

		if config.get('http_headers', None) is not None:
			headers.update(config.get('http_headers', []))

		if etags_data is not None:
			headers.update({'If-None-Match': etags_data['etag']})

		if additional_query_string is not None:
			_url = compat._format('{}{}{}', url, '?' if url.find('?') == -1 else '&', urlencode(additional_query_string))
			url = _url
		if isinstance(post_data, dict):
			post_data = urlencode(post_data)

		cookie_processor = None
		cookie_jar = None
		if cookie_file is not None:
			cookie_jar = MozillaCookieJar(cookie_file)
			try:
				cookie_jar.load()
			except LoadError:
				xbmc_helper().log_debug('Failed to load from cookiefile {} with error {} - new session?', cookie_file, LoadError.strerror)
				pass
			cookie_processor = HTTPCookieProcessor(cookie_jar)

		if xbmc_helper().get_bool_setting('use_https_proxy') is True and xbmc_helper().get_text_setting(
		        'https_proxy_host') != '' and xbmc_helper().get_int_setting('https_proxy_port') != 0:

			proxy_uri = compat._format('{}:{}',
			                           xbmc_helper().get_text_setting('https_proxy_host'),
			                           xbmc_helper().get_text_setting('https_proxy_port'))

			xbmc_helper().log_debug('Using proxy uri {}', proxy_uri)
			prxy_handler = ProxyHandler({
			        'http': proxy_uri,
			        'https': proxy_uri,
			})
			if cookie_processor is None:
				install_opener(build_opener(prxy_handler))
			else:
				install_opener(build_opener(prxy_handler, cookie_processor))

		elif cookie_processor is not None:
			install_opener(build_opener(cookie_processor))

		if post_data is not None:
			request = Request(url, data=post_data.encode('utf-8'), headers=headers)
		else:
			request = Request(url, headers=headers)

		response = urlopen(request, timeout=40)

		if response.info().get('Content-Encoding') == 'gzip':
			response_content = compat._decode(GzipFile(fileobj=BytesIO(response.read())).read())
		else:
			response_content = compat._decode(response.read())

		if cookie_jar is not None:
			cookie_jar.save()

		final_url = response.geturl()
		_etag = response.info().get('etag', None)
		if no_cache is False and _etag is not None:
			set_etags_data(request_hash, _etag, response_content)

	except HTTPError as http_error:

		if http_error.code == 304 and etags_data.get('data', None) is not None:
			response_content = etags_data.get('data')
		else:
			try:
				if http_error.info().get('Content-Encoding') == 'gzip':
					error_body = compat._decode(GzipFile(fileobj=BytesIO(http_error.read())).read())
				else:
					error_body = compat._decode(http_error.read())

				xbmc_helper().log_debug('HTTP ERROR: {}', error_body)
				json_errors = loads(error_body)
				xbmc_helper().log_debug('JSON ERRORS: {}', json_errors)

				has_decoded_error = False
				if isinstance(json_errors, dict) and 'errors' not in json_errors.keys() and 'code' in json_errors.keys():
					json_errors = {'errors': [json_errors]}
				elif isinstance(json_errors, list) and len(json_errors) == 1 and isinstance(json_errors[0], dict):
					json_errors = {'errors': json_errors}
				err_str = str(http_error.code)
				return_errors = []

				if isinstance(json_errors, dict):
					for error in json_errors.get('errors', []):
						if 'msg' in error.keys():
							err_str = compat._format('{}|{}', err_str, error.get('msg'))
							has_decoded_error = True
						if 'code' in error.keys() and error['code'] in return_json_errors:
							return_errors.append(error['code'])
							has_decoded_error = True

				xbmc_helper().log_debug('return_json_errors {}', return_errors)

				if len(return_errors) > 0:
					response_content = dumps({'json_errors': return_errors})

				elif has_decoded_error is True:
					xbmc_helper().notification(
					        'Error',
					        err_str,
					)
					exit(0)

			except Exception:
				raise http_error

	except Exception as e:
		xbmc_helper().log_error('Failed to load url: {} headers {} post_data {} - Exception: {}', url, headers, post_data, e)

		if fail_silent is True:
			pass
		else:
			xbmc_helper().notification(compat._format(xbmc_helper().translation('ERROR'), 'URL Access'),
			                           compat._format(xbmc_helper().translation('MSG_NO_ACCESS_TO_URL'), str(url)))
			exit(0)

	if return_final_url:
		return final_url, response_content

	return response_content

Пример #35

Показать файл

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.deis/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.deis dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_formation(self):
        """
        Return the formation name for the current directory

        The formation is determined by parsing `git remote -v` output.
        If no formation is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        # try to match a deis remote
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        m = re.match(r'^deis\W+(?P<url>\S+)\W+\(', remotes, re.MULTILINE)
        if not m:
            raise EnvironmentError(
                'Could not find deis remote in `git remote -v`')
        url = m.groupdict()['url']
        m = re.match('\S+:(?P<formation>[a-z0-9-]+)(.git)?', url)
        if not m:
            raise EnvironmentError("Could not parse: {url}".format(**locals()))
        return m.groupdict()['formation']

    formation = property(get_formation)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Пример #36

Показать файл

Файл: download_image.py Проект: saiplanner/sentinel-1-pipeline

class bulk_downloader:
    def __init__(self, id, username, password, table_name):
        # List of files to download
        if id[:3] == 'S1A':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SA/{}.zip'.format(id)
            ]
        elif id[:3] == 'S1B':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SB/{}.zip'.format(id)
            ]
        else:
            print('no identified sensor: {}'.format(id))
            logging.error('sensor not identified: {}'.format(id))
            return

        self.username = username
        self.password = password
        self.table_name = table_name
        self.save_to = os.getenv('IMAGES_PATH')

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(
            os.path.dirname(os.path.abspath('__file__')),
            '.bulk_download_cookiejar.txt')
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request' \
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                'WARNING: Cannot write to current path! Check permissions for {0}'
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):

        # remove the cookie_jar_path file if its older than a day
        date_created = cookie_creation_date()
        if date_created:
            dx = datetime.now() - date_created
            hour = dx.total_seconds() / (3600)
            if hour > 10:
                print('cookie greater than 10 hours so removing it')
                os.remove(self.cookie_jar_path)

        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(' > Re-using previous cookie jar.')
                return True
            else:
                print(' > Could not validate old cookie Jar')

        # We don't have a valid cookie, prompt user or creds
        print('No existing URS cookie found, creating one')
        print('(Credentials will not be stored, saved or logged anywhere)')

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(' > Cookiejar is bunk: {0}'.format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(' > attempting to download {0}'.format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print('\nIMPORTANT: ')
            print(
                'Your user appears to lack permissions to download data from the ASF Datapool.'
            )
            print(
                '\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov'
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            # Funky Test env:
            if ('vertex-retired.daac.asf.alaska.edu' in redir_url
                    and 'test' in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print('Redirect ({0}) occurred, invalid cookie value!'.format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        new_username = self.username
        new_password = self.password

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \
                          self.asf_urs4['redir'] + '&response_type=code&state='

        try:
            # python2
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password))
        except TypeError:
            # python3
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password, 'utf-8'))
            user_pass = user_pass.decode('utf-8')

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if e.code == 401:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            # COOKIE SUCCESS!
            print('cookie saved')
            self.cookie_jar.save(self.cookie_jar_path)
            save_cookie_creation_date()
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True
        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(os.path.join(self.save_to, download_file)):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(
                        os.path.join(self.save_to, download_file))
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    # partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(os.path.join(self.save_to, download_file))

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        # handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, os.path.join(self.save_to, download_file))
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(os.path.join(self.save_to,
                                                   download_file))
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self, rid):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------\n\n"
        )

        # since we are downloading one file at a time!
        if len(self.success) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=TRUE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

        # ideally should not end up here but anyway
        if len(self.skipped) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=FALSE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

Пример #37

Показать файл

 def load_cookies_from_mozilla(self, filename):
     ns_cookiejar = MozillaCookieJar()
     ns_cookiejar.load(filename, ignore_discard=True, ignore_expires=True)
     return ns_cookiejar

Пример #38

Показать файл

Файл: lsession.py Проект: khalidhsu/qqmsgbak

class LSession():
    def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3):
        self.timeout=timeout
        self.retime=retime
        self.sleept=sleept
        #proxy '1.234.77.96:80'
        if cookiefile == None:
            self.cookiejar = CookieJar()
        else:
            self.cookiejar = MozillaCookieJar(filename=cookiefile)
            #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile)
            if not os.path.isfile(cookiefile):
                open(cookiefile, 'w').write(MozillaCookieJar.header)
                #open(cookiefile, 'w').write('#abc\n')
                pass
            self.cookiejar.load(filename=cookiefile,ignore_discard=True)
            #print "ck:",self.cookiejar 
        self.cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor)
        if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy}))
        #for posting a file
        try:
            import MultipartPostHandler #for posting a file,need installed
            self.opener.add_handler(MultipartPostHandler.MultipartPostHandler())
        except NameError as e:print e
            
        self.response=None
        self.request=None
        self.header=[]
    def add_header(self,k,v) : self.header.append((k,v))

    def build_request(self,url,params=None):
        self.request=Request(url,params)
        if not self.response is None:self.request.add_header('Referer',self.url())
        #self.request.add_header('User-Agent',
        #                        'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \
        #                        (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25')
        #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB
        #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB')
        self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15')
        while  self.header :
            _k,_v = self.header.pop()
            self.request.add_header(_k,_v)
        #Mobile/7B405
        #self.request.add_header('User-Agent','Mobile/7B405')
        return self.request

    def __del__(self) : self.save_cookie()

    def urlopen(self,req):
        retime=self.retime
        while retime > 0:
            try:
                return self.opener.open(req,timeout=self.timeout)
            except Exception as e:
                retime -= 1
                traceback.print_exc(file=sys.stdout)
                print 'Wait and retry...%d'%(self.retime-retime)
                sleep(self.sleept)

    def savefile(self,filename,url):
        self.response=self.urlopen(self.build_request(url))
        CHUNK = 50 * 1024
        with open(filename, 'wb') as fp:
            while True:
                chunk = self.response.read(CHUNK)
                if not chunk: break
                fp.write(chunk)
    def post(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,urlencode(post_data)))
        return  self.response
    def post_raw(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,post_data))
        return  self.response

    def post_file(self,url,params):
        self.response=self.urlopen(self.build_request(url, params))
        return  self.response
    def get(self,url):
        self.response=self.urlopen(self.build_request(url))
        #import urllib
        #print  urllib.urlopen('http://mrozekma.com/302test.php').geturl()
        # import requests
        # r=requests.get(url)
        # print r.content
        return  self.response
    def text(self,dec='gbk',enc='utf') :
        return self.response.read().decode(dec).encode(enc)
    def url(self) : return self.response.url
    def logout(self) : self.cookiejar.clear()
    def Verify_proxy(self) :
        pass
    def show_cookie(self):
        #print self.cookiejar
        for i in self.cookiejar:
            print i
    def save_cookie(self):
        # if  hasattr(self.cookiejar,'save'):#in case non cookiejar
        #     self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        try: 
            self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        except Exception as e: 
            traceback.print_exc(file=sys.stdout)

Пример #39

Показать файл

Файл: qzone.py Проект: jtbuaa/qzone-spider

def gen_login_cookie():
    cookie = MozillaCookieJar()
    cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True)
    return cookie

Пример #40

Показать файл

Файл: dwn.py Проект: AndriiKolotii/ukrainian_data_cube

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041209_20200626T041234_033183_03D816_7063.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041144_20200626T041209_033183_03D816_0D5E.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041208_20200614T041233_033008_03D2C4_2DC4.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041143_20200614T041208_033008_03D2C4_584D.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041208_20200602T041233_032833_03CD92_6A43.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041143_20200602T041208_032833_03CD92_5A25.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Пример #41

Показать файл

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """
    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Пример #42

Показать файл

Файл: json_rpc.py Проект: Tydus/qqdown

class Json_RPC(object):
    def __init__(self):
        #self.cookie_jar=CookieJar()
        self.cookie_jar=MozillaCookieJar()
        self.opener=urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self.cookie_jar),
                #urllib2.HTTPHandler(debuglevel=1),
                #urllib2.HTTPSHandler(debuglevel=1),
                )

    def load_cookie(self,filename):
        ''' Load Cookie from file '''
        self.cookie_jar.load(filename,ignore_discard=True)

    def save_cookie(self,filename):
        ''' Save Cookie to file '''
        self.cookie_jar.save(filename,ignore_discard=True)

    def json_rpc(self,url,method="GET",**kwargs):
        '''
        Performs a json rpc to url and return python-native result

        will extract dict or list from result

        Example:
        try{callback({'result':0,'data':[]});}catch(e){}
        will be transcode to
        {"result":0,"data":[]}

        See also: http_rpc

        '''
        ret=self.http_rpc(url,method,**kwargs)
        ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret)
        ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group()
        #ret=sub(r"'",r'"',ret)
        ret=loads(ret)
        return ret

    def http_rpc(self,url,method="GET",**kwargs):
        '''
        Perfoms a http rpc to url and return raw result

        url          base url to rpc
        method       'GET' or 'POST'
        query        query string passing by a dict
        data         post data passing by a dict
        file         post files passing by a list of 3-tuple: key, filename, data
                     ( this indicates multipart/form-data )
        
        '''
        kwe=Entity(kwargs)

        if method not in ['GET','POST']:
            raise RPCError("Method not in GET or POST")

        if kwe.query:
            url+="?"+urlencode(kwe.query)

        if method=='GET':
            request=Request(url)
        elif kwe.file:
            content_type,data=multipart_encode(kwe.data,kwe.file)
            request=Request(url,data)
            request.add_header('Content-Type', content_type)
        elif kwe.data:
            data=urlencode(kwe.data)
            request=Request(url,data)
        else:
            raise RPCError("POST with no data")

        request.add_header('User-Agent',
            "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0"
            )
        request.add_header('Accept-Charset',"UTF-8")

        response=self.opener.open(request)
        ret=response.read()
        response.close()

        #print "\033[33m"+str(self.cookie_jar)+"\033[0m"

        # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header
        if ret.startswith('\xef\xbb\xbf'):
            ret=ret[3:]

        return ret

Пример #43

Показать файл

Файл: myhttplib.py Проект: pkuazi/L8_metadata_crawl

    c = {}
    for v in args:
        if type(v) == type({}):
            c.update(v) 
    return c

user_agent = {'User-agent': "Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36" }
cookie_file = "/dev/shm/urllib2_cookies.txt"

policy = DefaultCookiePolicy()

global _cookieJar

_cookieJar = MozillaCookieJar(cookie_file, policy)
if os.path.exists(cookie_file ):
    _cookieJar.load()

DEBUG_LEVEL = 2

_http = urllib2.HTTPHandler()
_http.set_http_debuglevel(DEBUG_LEVEL)

_https = urllib2.HTTPSHandler()
_https.set_http_debuglevel(DEBUG_LEVEL)

_cookies = urllib2.HTTPCookieProcessor( _cookieJar )

urllib2.install_opener(urllib2.build_opener(_http, _https, _cookies))

ajax_header = {"X-Requested-With": "XMLHttpRequest" }
json_header = {"Accept": "application/json, text/javascript, */*; q=0.01"}

Пример #44

Показать файл

Файл: basis_retr.py Проект: akrueger/basisSleep

class BasisRetr:
	"""The main entry points, once a BasisRetr object has been created, are: 
	1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it
	2) GetActivityCsvForMonth()-- download activity summaries for an entire month
	3) GetSleepCsvForMonth()--download sleep summaries for an entire month."""
	LOGIN_URL = 'https://app.mybasis.com/login'
	METRICS_URL = 'https://app.mybasis.com/api/v1/metricsday/me?day={date}&padding=0&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true'
	ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep'
	SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep'
	SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events'

	def __init__(self, loadconfig = None):
		# create config info
		self.app_state = Config(defaults=DEFAULTS, fpath=STATE_FILEPATH)
		self.has_error = False
		if loadconfig:
			self.app_state.Load()
		else:
			# if config file doesn't exist, save the defaults loaded above
			self.app_state.Save() #saves 
	
		self.CFG= Config2()
		err_text = self.CFG.Parse(CFG_FILEPATH)
		if err_text:
			print 'Config file read error: '+err_text
		# url opener for website retrieves
		self.cj = MozillaCookieJar(self.CFG.cookie_filename)
		self.session_cookie = None
		if os.path.exists(self.CFG.cookie_filename):
			self.cj.load()
			self.CheckSessionCookie() # set session cookie if it exists and hasn't expired
		# need to use build_opener to submit cookies and post form data
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

	def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True):
		"""Main entry method for getting a day's worth of data, formatting, then saving it.  typ is the type of data: metrics, activities, or sleep.  Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json.  act_metr, if True, saves sleep and activity state along with metrics."""
		date = self.YrMoDyToString(yr, mo, day)
		# Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep.
		ydate = self.YrMoDyToString(*self.GetYesterday(yr, mo, day))
		
		self.Status("Checking Login")
		if not self.CheckLogin(): # ensure we're logged in
			return False
			
		self.Status("getting {} for {}".format(typ,date))
		# figure out which data to get
		data = None
		
		# automatically override cache if day is incomplete (doesn't have 24 hrs of data)
		if not self.DayMetricsJsonIsComplete(date):
			override_cache = True
	
		# if needed, download json data from website and save to file
		if typ == 'metrics':
			mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache)
			if not mjdata: # there was an error
				return False

			### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD
			if type(mjdata) == str or mjdata == None: # simple error checking
				self.Status('OnGetDayData: Metrics json conversion failed.')
				return False
			# also load up actities
		if typ == 'activities' or act_metr:
			ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache)
			if type(ajdata) == str or ajdata == None: # simple error checking
				self.Status('OnGetDayData: Activities json conversion failed.')
				return False
		if typ == 'sleep' or act_metr:
			sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache)
			if type(sjdata) == str or sjdata == None: # simple error checking
				self.Status('OnGetDayData: Sleep json conversion failed.')
				return False
			if act_metr: # add yesterday's sleep data
				sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep')
		
		# Next, turn the list of python objects into a csv file.
		# If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp.
		cdata = None
		if save_csv:
			if typ == 'activities' or act_metr:
				act_list = self.JsonActivitiesToList(ajdata)
				cdata = self.CreateCSVFromList(self.CFG.csv.activity_colnames, act_list)
			if typ == 'sleep' or act_metr:
				sleep_evts_list = self.JsonSleepEventsToList(sjdata)
				cdata = self.CreateCSVFromList(self.CFG.csv.sleep_evt_colnames, sleep_evts_list)
				if act_metr:
					# prepend yesterday's sleep events as they may start before midnight.
					sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2)
			if typ == 'metrics':
				if u'error' in mjdata:
					err = mjdata[u'error']
					self.Status("HTTP response error ({}, # {}): {}".format(err[0],mjdata[u'code'], err[1]))
					return
				metrics_list = self.JsonMetricsToList(mjdata)
				if act_metr: # add activities to metrics               
					self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list)
					header = self.CFG.csv.metrics_colnames + self.CFG.csv.activity_type_colnames
				else:
					header = self.CFG.csv.metrics_colnames
				cdata = self.CreateCSVFromList(header, metrics_list)
		
		# If we were able to make a csv file, save it.
		if cdata:
			fpath = self.PathForFile(date, typ, 'csv')
			fname = os.path.split(fpath)[1]
			self.SaveData(cdata, fpath)
			self.Status("Saved "+typ+" csv file "+fname)
		return True # success

	def PathForFile(self, dt, typ, fmt):
		cfname = self.CFG.day_fname_template.format(date=dt, typ=typ, fmt=fmt)
		folder = self.app_state.savedir if fmt =='csv' else self.GetJsonStorageDir()
		fpath = os.path.join(os.path.abspath(folder), cfname)
		return fpath

	##
	##	TODO: How deal with sync before you registered with Basis?
	##
	def Sync(self, do_csv, override_cache, act_metr=True, callback = None):
		"""Secondary entry point. Catch up to current day. Downloads any missing or incomplete days, going back self.app_state.sync days."""
		# download what we have for today.  It won't be complete, but you can at least get the data.
		today = datetime.date.today()
		yr, mo, dy = today.year, today.month, today.day
		file_count = 0 # tallly # of files actually changed
		if not self.CheckLogin(): # make sure we're logged in correctly before starting
			return
		for days in range(self.CFG.sync_days):
			# see if files already exists
			dt = self.YrMoDyToString(yr, mo, dy)
			self.Status('Sync: checking '+dt)
			fpath = self.PathForFile(dt, 'metrics', 'csv')
			# if file doesn't exist, then found = false, and/or break
			if not os.path.isfile(fpath) or not self.DayMetricsJsonIsComplete(dt): # download day.
				# if override_cache is True, then will always re-download all days.  Don't let that happen.
				if not self.GetDayData(yr, mo, dy, 'metrics', do_csv, override_cache = False, act_metr = act_metr):
					return # quit if problem
				file_count += 1
				if callable(callback): # callback (if available) to UI manager to prevent freeze
					callback(yr, mo, dy) # allow
			# loop change: yesterday.
			yr, mo, dy = self.GetYesterday(yr, mo, dy)
		# Done. Let user know.
		self.Status('Sync done; {} files updated'.format(file_count if file_count > 0 else 'no'))

	def CheckLogin(self):
		"""Check to see if Login is needed; if so, then log in. """
		elapsed_hr = (time.time() - self.app_state.login_timestamp)/3600
		if self.CheckSessionCookie() and self.app_state.session_token and elapsed_hr < self.CFG.login_timeout_hrs:
			success = True
		else:
			try:
				self.Login()
				success = True
			except Exception, v:
				self.Status('Login difficulty: '+`v[0]`)
				success= False
		if success:
			self.app_state.login_timestamp = time.time()
		return success

Пример #45

Показать файл

Файл: submit.py Проект: wurikiji/algospot

class AOJClient(object):
    def __init__(self, cookie_file_path='aoj-cookie.txt'):
        self.cookie_file_path = cookie_file_path
        self.cookiejar = MozillaCookieJar()
        if os.path.isfile(cookie_file_path):
            self.cookiejar.load(cookie_file_path)

        self.opener = urllib2.build_opener(
                urllib2.HTTPRedirectHandler(),
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(self.cookiejar))

    def get_csrf_token(self, url):
        request = urllib2.Request(url=url)
        response = self.opener.open(request)
        data = response.read()
        return REGEXP_CSRF.findall(data)[0]

    def refresh_session(self):
        print 'Not Logged In!'
        context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL),
                   'username': raw_input('Username: '******'password': getpass.getpass('Password: '******'accounts/login/',
                                  data=urllib.urlencode(context))
        self.opener.open(request)
        self.cookiejar.save(self.cookie_file_path)

    def check_problem_exist(self, problem_name):
        try:
            request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name)
            response = self.opener.open(request)
        except urllib2.HTTPError as err:
            if err.code == 404: # Not Found
                raise AOJProblemNotExist
            else:
                raise

    def detect_language(self, source_file):
        if '.' in source_file:
            selected_language = source_file[source_file.rfind('.')+1:]
        else:
            selected_language = ''
        
        while selected_language not in LANGUAGES:
            selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower()

        return selected_language

    def submit(self, submission):
        self.check_problem_exist(submission.problem)
        context = {}
        context['language'] = self.detect_language(submission.source)
        context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem)

        try:
            with open(submission.source) as f:
                context['source'] = f.read()
        except IOError:
            raise AOJFileNotExist()

        def try_submit(first=True):
            if not first:
                self.refresh_session()
            request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem,
                                  data=urllib.urlencode(context))
            response = self.opener.open(request)

            if not response.geturl().lower().startswith(LOGIN_URL):
                print 'Submission Complete!'
                return
            try_submit(first=False)
        try_submit()

    def get_submission_list(self, problem_name):
        self.check_problem_exist(problem_name)
        request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name)
        response = self.opener.open(request)

        try:
            import lxml.html
        except ImportError:
            print 'lxml library is needed for parsing HTML'
            return

        html = lxml.html.fromstring(unicode(response.read().decode('utf8')))
        context = {}
        fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on')
        length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15}
        template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s'

        def width(string):
            return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string)

        for tr in html.cssselect('table.submission_list tr'):
            for field in fields:
                element = tr.find_class(field)
                if element:
                    context[field] = unicode(element[0].text_content().strip())
                else:
                    context[field] = u''
                context[field] = ' ' * (length[field] - width(context[field])) + context[field]
            print template % context