Exemplo n.º 1
0
import urllib2  # 爬虫库2
import cookielib  #提供可存储cookie的对象,以便于与urllib2配合使用来访问Internet资源

# http proxy
# 右到网站会检测某一段时间内某个IP的访问次数,如果访问次数过多,它会禁止你的访问。
# 所以你可以设置一些代理服务器来帮助你,每隔一段时间换一个代理来避免被禁用
# 设置代理服务器是方式如下:
proxy_server = {"http": 'http://some-proxy.com:8080'}
proxy_handler = urllib2.ProxyHandler(proxy_server)
proxy_opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(proxy_opener)

# cookie配置
# CookieJar —-派生—->FileCookieJar  —-派生—–>MozillaCookieJar和LWPCookieJar
#cookie = cookielib.CookieJar() # 声明一个CookieJar对象实例来保存cookie
cookie = cookielib.MozillaCookieJar(
    'cookies/baidu_cookie.txt')  # 声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件
#cookie.load('cookies/baidu_cookie.txt', ignore_discard=True, ignore_expires=True) # 从文件中读取cookie内容到变量
cookie_handler = urllib2.HTTPCookieProcessor(
    cookie)  # 利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器
cookie_opener = urllib2.build_opener(cookie_handler)  # 通过handler来构建opener

# 请求参数配置
url = "https://www.baidu.com"
#url = "https://passport.baidu.com/v2/api/?login"
#url = "https://passport.csdn.net/account/login"
headers = {
    "Content-Type":
    "application/x-www-form-urlencoded",
    "User-Agent":
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
    'Referer':
Exemplo n.º 2
0
    def login(self):
        try:
            cookiejar = cookielib.MozillaCookieJar()
            self.opener = urllib2.build_opener(
                urllib2.HTTPCookieProcessor(cookiejar))
            #            cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar())
            #            self.opener = urllib2.build_opener(cookie_support)
            self.opener.addheaders = [('User-agent', 'Opera/9.23')]
            urllib2.install_opener(self.opener)
            url1 = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=' + \
                  'sinaSSOController.preloginCallBack&su=c2h1aW11Xzg4JTQwMTYzLmNvbQ%3D%3D' + \
                  '&client=ssologin.js(v1.3.17)&_=' + str(time.time()).split('.')[0]
            url2 = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.17)'

            header1 = {
                #'Host': 'login.sina.com.cn',
                'Host':
                'weibo.com',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0',
                'Accept':
                '*/*',
                'Accept-Language':
                'zh-cn,zh;q=0.5',
                'Accept-Encoding':
                'gzip, deflate',
                'Accept-Charset':
                'GB2312,utf-8;q=0.7,*;q=0.7',
                'Connection':
                'keep-alive',
                'Referer':
                'http://weibo.com/',
                'Cookie':
                'SUS=SID-1789744932-1325485823-JA-ab6v3-825d71755a572f8423b7abbd7a8674b4; SUE=es%3D0c9e0ff431d182656f826475eb0dfa43%26ev%3Dv1%26es2%3D152ef8d9ca7753718fc83db371ee72ff%26rs0%3DDgKaAcjk3lwVy5kPC5dnIiNo3YUrEtWRRtvJ2JPNAzFWIEw3u3hX%252FbvwWNCJnEeyPICk%252B9J0ZjSup9vVgqJCOL%252B%252FUFztxT69u7gTnqHx7pxkM7CypI5pQF7ah71N5GvK6F4lPsvD44JkS8p%252FcdyezraMt8yyU5MsB%252B397U2LRUo%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1325485823%26et%3D1325572223%26d%3Dc909%26i%3D13b0%26us%3D1%26uid%3D1789744932%26user%3Dshuimu_88%2540163.com%26ag%3D4%26name%3Dshuimu_88%2540163.com%26nick%3Dguge%26fmp%3D%26lcp%3D2011-08-04%252015%253A41%253A22%26vf%3D0%26ac%3D2; ALF=1326090618; SSOLoginState=1325485823; wvr=3.6; USRHAJAWB=usrmdins13121; [email protected]; USRHAWB=usrmdins212_542; ads_ck=1; _s_tentry=weibo.com; UOR=weibo.com,weibo.com,; Apache=8430538139278.456.1325485892311; SINAGLOBAL=8430538139278.456.1325485892311; ULV=1325485892540:1:1:1:8430538139278.456.1325485892311:'
            }

            header2 = {
                'Host': 'login.sina.com.cn',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0',
                'Accept':
                'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'zh-cn,zh;q=0.5',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Charset': 'GB2312,utf-8;q=0.7,*;q=0.7',
                'Connection': 'keep-alive',
                'Referer': 'http://weibo.com/',
                'Content-Type': 'application/x-www-form-urlencoded',
                'Content-Length': '378'
            }

            header3 = {
                'Host': 'weibo.com',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0',
                'Accept': '*/*',
                'Accept-Language': 'zh-cn,zh;q=0.5',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Charset': 'GB2312,utf-8;q=0.7,*;q=0.7',
                'Connection': 'keep-alive',
                'Referer': url2
            }

            req1 = urllib2.Request('http://weibo.com/whitefoxx', None, header1)
            #self.opener.open('http://weibo.com/')
            #req1 = urllib2.Request('http://')
            fs = self.opener.open(req1)
            #            fs = urllib2.urlopen(req1)

            hdoc = fs.read()
            print hdoc
            return
            #            buf = StringIO.StringIO(hdoc)
            #            f = gzip.GzipFile(fileobj=buf)
            #            hdoc = f.read()
            hdoc = hdoc.split('(')[1].split(')')[0]
            items = json.loads(hdoc)
            servertime = items['servertime']
            nonce = items['nonce']
            sp = self.hash_password('zcgyb0668', servertime, nonce)
            print sp

            postdata = {
                'entry': 'weibo',
                'gateway': '1',
                'from': '',
                'savestate': '7',
                'useticket': '1',
                'ssosimplelogin': '******',
                'vsnf': '1',
                'vsnval': '',
                'su': 'c2h1aW11Xzg4JTQwMTYzLmNvbQ==',
                'service': 'miniblog',
                'servertime': str(servertime),
                'nonce': nonce,
                'pwencode': 'wsse',
                'sp': sp,
                'encoding': 'UTF-8',
                'url':
                'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
                'returntype': 'META'
            }
            postdata = urllib.urlencode(postdata)
            req2 = urllib2.Request(url2, postdata)
            fs = self.opener.open(req2)
            #            fs = urllib2.urlopen(req2)
            print fs.read()
            fs.close()
            print cookiejar
            req3 = urllib2.Request(
                'http://weibo.com/u/1789744932?wvr=3.6&lf=reg')
            f = open('foxxcyb.html', 'w')
            fs2 = self.opener.open(req3)
            #            fs2 = urllib2.urlopen(req3)
            f.write(fs2.read())
            fs2.close()
            f.close()

            print cookiejar
        except urllib2.URLError, e:
            print e
Exemplo n.º 3
0
    def getURLRequestData(self, params={}, post_data=None):
        def urlOpen(req, customOpeners):
            if len(customOpeners) > 0:
                opener = urllib2.build_opener(*customOpeners)
                response = opener.open(req)
            else:
                response = urllib2.urlopen(req)
            return response

        cj = cookielib.MozillaCookieJar()

        response = None
        req = None
        out_data = None
        opener = None

        if 'host' in params:
            host = params['host']
        else:
            host = self.HOST

        if 'header' in params:
            headers = params['header']
        elif None != self.HEADER:
            headers = self.HEADER
        else:
            headers = {'User-Agent': host}

        if dbg == 'true':
            log.info('pCommon - getURLRequestData() -> params: ' + str(params))
            log.info('pCommon - getURLRequestData() -> params: ' +
                     str(headers))

        customOpeners = []
        #cookie support
        if 'use_cookie' not in params and 'cookiefile' in params and (
                'load_cookie' in params or 'save_cookie' in params):
            params['use_cookie'] = True

        if params.get('use_cookie', False):
            customOpeners.append(urllib2.HTTPCookieProcessor(cj))
            if params.get('load_cookie', True):
                try:
                    cj.load(params['cookiefile'], ignore_discard=True)
                except:
                    pass

        if None != post_data:
            if dbg == 'true':
                log.info('pCommon - getURLRequestData() -> post data: ' +
                         str(post_data))
            if params.get('raw_post_data', False):
                dataPost = post_data
            else:
                dataPost = urllib.urlencode(post_data)
            req = urllib2.Request(params['url'], dataPost, headers)
        else:
            req = urllib2.Request(params['url'], None, headers)

        if not params.get('return_data', False):
            out_data = urlOpen(req, customOpeners)
        else:
            gzip_encoding = False
            try:
                response = urlOpen(req, customOpeners)
                if response.info().get('Content-Encoding') == 'gzip':
                    gzip_encoding = True
                data = response.read()
                response.close()
            except urllib2.HTTPError, e:
                if e.code == 404:
                    if dbg == 'true':
                        log.info(
                            'pCommon - getURLRequestData() -> !!!!!!!! 404 - page not found handled'
                        )
                    if e.fp.info().get('Content-Encoding') == 'gzip':
                        gzip_encoding = True
                    data = e.fp.read()
                    #e.msg
                    #e.headers
                else:
                    #printExc()
                    raise

            try:
                if gzip_encoding:
                    if dbg == 'true':
                        log.info(
                            'pCommon - getURLRequestData() -> Content-Encoding == gzip'
                        )
                    buf = StringIO(data)
                    f = gzip.GzipFile(fileobj=buf)
                    out_data = f.read()
                else:
                    out_data = data
            except:
                out_data = data
Exemplo n.º 4
0
# --* encoding:utf-8 *--
import urllib
import urllib2
import cookielib

# values = {"username": "******", "password": "******"}
# data = urllib.urlencode(values)
# url = "http://127.0.0.1:8080/cba/"
# request = urllib2.Request(url, data)
# response = urllib2.urlopen(request)
#
# f = open(r'F:\PythonLearning\resource\就分.html'.decode('utf-8'), 'w')
# f.write(response.read())
cookie = cookielib.MozillaCookieJar(r'F:\PythonLearning\resource\workfile')
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
postdata = urllib.urlencode({'stuid': '201200131012', 'pwd': '23342321'})
# 登录教务系统的URL
loginUrl = 'http://jwxt.sdu.edu.cn:7890/pls/wwwbks/bks_login2.login'
# 模拟登录,并把cookie保存到变量
result = opener.open(loginUrl, postdata)
# 保存cookie到cookie.txt中
cookie.save(ignore_discard=True, ignore_expires=True)
f = open(r'F:\PythonLearning\resource\就分.html'.decode('utf-8'), 'w')
f.write(result.read())
# 利用cookie请求访问另一个网址,此网址是成绩查询网址
gradeUrl = 'http://jwxt.sdu.edu.cn:7890/pls/wwwbks/bkscjcx.curscopre'
# 请求访问成绩查询网址
result = opener.open(gradeUrl)
print result.read()
Exemplo n.º 5
0
  def __init__(self, host, auth_function, user_agent, source,
               host_override=None, extra_headers=None, save_cookies=False,
               auth_tries=3, account_type=None, debug_data=True, secure=True,
               ignore_certs=False, rpc_tries=3, options=None):
    """Creates a new HttpRpcServer.

    Args:
      host: The host to send requests to.
      auth_function: A function that takes no arguments and returns an
        (email, password) tuple when called. Will be called if authentication
        is required.
      user_agent: The user-agent string to send to the server. Specify None to
        omit the user-agent header.
      source: The source to specify in authentication requests.
      host_override: The host header to send to the server (defaults to host).
      extra_headers: A dict of extra headers to append to every request. Values
        supplied here will override other default headers that are supplied.
      save_cookies: If True, save the authentication cookies to local disk.
        If False, use an in-memory cookiejar instead.  Subclasses must
        implement this functionality.  Defaults to False.
      auth_tries: The number of times to attempt auth_function before failing.
      account_type: One of GOOGLE, HOSTED_OR_GOOGLE, or None for automatic.
      debug_data: Whether debugging output should include data contents.
      secure: If the requests sent using Send should be sent over HTTPS.
      ignore_certs: If the certificate mismatches should be ignored.
      rpc_tries: The number of rpc retries upon http server error (i.e.
        Response code >= 500 and < 600) before failing.
      options: the command line options (ignored in this implementation).
    """
    if secure:
      self.scheme = "https"
    else:
      self.scheme = "http"
    self.ignore_certs = ignore_certs
    self.host = host
    self.host_override = host_override
    self.auth_function = auth_function
    self.source = source
    self.authenticated = False
    self.auth_tries = auth_tries
    self.debug_data = debug_data
    self.rpc_tries = rpc_tries

    # TODO(user): Consider validating account_type?
    self.account_type = account_type

    self.extra_headers = {}
    if user_agent:
      self.extra_headers["User-Agent"] = user_agent
    if extra_headers:
      self.extra_headers.update(extra_headers)

    self.save_cookies = save_cookies
    # By default there are no cookies to use or save.
    self.cookie_jar = cookielib.MozillaCookieJar()
    self.opener = self._GetOpener()
    if self.host_override:
      logger.debug("Server: %s; Host: %s", self.host, self.host_override)
    else:
      logger.debug("Server: %s", self.host)

    # If we're being run against localhost, set the dev_appserver cookie.
    if ((self.host_override and self.host_override == "localhost") or
        self.host == "localhost" or self.host.startswith("localhost:")):
      self._DevAppServerAuthenticate()
Exemplo n.º 6
0
def login():
    try:
        '''get login viewstate'''
        login_url = 'http://jwc1.wtc.edu.cn/default3.aspx'
        login_headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0'
        }

        view = urllib2.urlopen(
            urllib2.Request(login_url, headers=login_headers)).read()
        soup = BeautifulSoup(view, "html.parser")
        tmp = soup.find('input', attrs={'name': '__VIEWSTATE'})
        viewstate = tmp['value']
        '''get login values'''
        StudentNo = raw_input("输入你的学号:")
        PassWord = raw_input("请输入你的密码:")

        login_data = urllib.urlencode({
            "__VIEWSTATE": viewstate,
            "TextBox1": StudentNo,
            "TextBox2": PassWord,
            "ddl_js": u'学生',
            "Button1": "+%B5%C7+%C2%BC+"
        })
        '''make a handler = opener
        request = urllib2.Request(login_url,login_data,login_headers)
        result = opener.open(request)
        两种登录的方法下面那种方法需要使用 "opener.addheaders" 修改http头
        '''
        mycookie = cookielib.MozillaCookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(mycookie))
        '''login and get cookie'''
        opener.addheaders = [(
            'User-Agent',
            'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0'
        )]
        result = opener.open(login_url, login_data)
        '''find error'''
        soup = BeautifulSoup(result.read(), "html.parser")
        error = soup.find_all('script')
        source = error[0].get_text().encode("utf-8")

        PassWord_error = "密码错误!!"
        PassWord_tmp = source.find(PassWord_error)
        StudentNo_error = "用户名不存在或未按照要求参加教学活动!!"
        StudentNo_tmp = source.find(StudentNo_error)

        try:
            if PassWord_tmp != -1:
                sys.exit(0)
        except:
            print PassWord_error
            print "请重新",
            main()

        try:
            if StudentNo_tmp != -1:
                sys.exit(0)
        except:
            print StudentNo_error
            print "请重新",
            main()
        '''get StudentName'''
        xs_main_url = "http://jwc1.wtc.edu.cn/xs_main.aspx?xh=" + StudentNo
        xs_main = opener.open(xs_main_url)
        soup = BeautifulSoup(xs_main.read(), "html.parser")
        tmp = soup.find(id="xhxm")
        StudentName = str(tmp.string.decode('gbk')[:-2])

        result_url = "http://jwc1.wtc.edu.cn/xscj_gc.aspx?xh=" + StudentNo + "&xm=" + StudentName + "&gnmkdm=N121605"
        viewstate_headers = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0",
            'Referer': xs_main_url,
        }
        result_headers = {
            'Referer':
            result_url,
            'user-Agent':
            "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"
        }
        '''get result viewstate'''
        request_gra1 = urllib2.Request(result_url, headers=viewstate_headers)
        result = opener.open(request_gra1)
        soup = BeautifulSoup(result.read(), "html.parser")
        tmp = soup.find('input', attrs={'name': '__VIEWSTATE'})
        viewstate = tmp['value']
        '''get Inquiry mode'''
        Inquiry_mode = raw_input("请选择按学年(1)还是学期(2)查询输入1或2:")
        if Inquiry_mode == '1':
            Inquiry_mode = '1'
            Button = 'Button5'
            Value = '按学年查询'
            Semester = ''
        elif Inquiry_mode == '2':
            Inquiry_mode = '2'
            Semester = raw_input("请输入第几学期(1或2):")
            Button = 'Button1'
            Value = '按学期查询'
        else:
            print "请键入1或2"
            main()
        Interval = raw_input("请输入学年区间例(2015-2016):")

        result_data = urllib.urlencode({
            '__VIEWSTATE': viewstate,
            'ddlXN': Interval,
            'ddlXQ': Semester,
            Button: Value
        })
        '''login and get result then return'''
        result = urllib2.Request(result_url, result_data, result_headers)
        result = opener.open(result)
        return result.read()
    except urllib2.URLError, e:
        if hasattr(e, "code"):
            return e.code
Exemplo n.º 7
0
def get_response_and_text(url,
                          headers=None,
                          needupdate=False,
                          update_info=None):
    if headers:
        this_headers = headers
    else:
        this_headers = {
            'User-Agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
        }
    num_reply = 5
    while True:  # 强制请求
        try:
            timea = time.time()
            cookies1 = cookielib.MozillaCookieJar()
            proxies1 = {'http': 'http://' + get_proxy_from_redis()}
            proxyhandler = urllib2.ProxyHandler(proxies1)
            cookiehandler = urllib2.HTTPCookieProcessor(cookies1)
            request1 = urllib2.Request(url=url, headers=this_headers)
            opener1 = urllib2.build_opener(proxyhandler, cookiehandler)
            response_in_function = opener1.open(request1,
                                                timeout=timeout_value)
            response_in_function_text = response_in_function.read()
            if response_in_function.code == 204:
                num_reply -= 1
                if num_reply < 0:
                    sys.exit()
                else:
                    raise Exception

            if needupdate:
                file1 = BASIC_FILE + '/chengdu/chengdu_sechdule.text'
                sechdule = 1700000
                sechdule = update_info['page_num']
                with open(file1, 'w') as fl:
                    fl.write(sechdule)

            break
        except Exception as e:
            if hasattr(e, 'code'):
                if e.code in [404, 400]:
                    opener1.close()
                    sys.exit()
                elif e.code == [204, 403]:  #可能是有数据的,但是被屏蔽了
                    num_reply -= 1
                    opener1.close()
                    if num_reply < 1:
                        sys.exit()

    timeb = time.time()
    proxy_here = proxies1.values()[0].split('//')[1]
    opener1.close()
    if timeb - timea < 10:
        proxy_sendback(proxy_here)
    if response_in_function.code == 204:
        return {'response_in_function': None, 'response_in_function_text': {}}
    return {
        'response_in_function': response_in_function,
        'response_in_function_text': response_in_function_text
    }
Exemplo n.º 8
0
def post_machine(t, j):
    pre = 'learn.tsinghua.edu.cn	FALSE	/	FALSE		'
    f = open('cookies.txt', 'wb')
    f.write('# Netscape HTTP Cookie File')
    f.write(''.join([pre, 'JSESSIONID', '\t', j, '\n']))
    f.write(''.join([pre, 'THNSV2COOKIE', '\t', t, '\n']))
    f.close()

    cjar = cookielib.MozillaCookieJar()
    cjar.load('cookies.txt', ignore_discard=True, ignore_expires=True)
    #print cjar
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cjar))
    opener.addheaders = [
        ('User-agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')
    ]
    urllib2.install_opener(opener)
    domain = 'http://learn.tsinghua.edu.cn'
    login_page = "".join(
        [domain, '/MultiLanguage/lesson/teacher/loginteacher.jsp'])
    userName = "******"
    password = "******"
    page = opener.open(login_page,
                       urllib.urlencode({
                           'userid': userName,
                           'userpass': password
                       }))  #POST
    cjar.save('cookies.txt', ignore_discard=True, ignore_expires=True)
    print cjar
    try:
        #get list of courses
        page = opener.open("".join(
            [domain, '/MultiLanguage/lesson/student/MyCourse.jsp?typepage=2']))
        soup = BeautifulSoup(page.read())
        course = [
            soup.findAll(attrs={'class': 'info_tr'}),
            soup.findAll(attrs={'class': 'info_tr2'})
        ]
        bbs_url = 'http://learn.tsinghua.edu.cn/MultiLanguage/public/bbs/bbs_talk_submit.jsp?post_par_id=0000&post_up_url=talk_list_student.jsp&post_cate_id=1'
        post_title = 'Americans%20attack%20Tsinghua%20network%3F'
        post_detail = 'Yes%2C%20we%20scan%21'
        bbs_url = "".join([
            bbs_url, '&post_title=', post_title, '&post_detail=', post_detail
        ])
        count = 0
        for c in course:
            course_id = c[0].td.a['href'][58:]
            #get post_bbs_id
            page = opener.open(''.join([
                domain,
                '/MultiLanguage/public/bbs/gettalkid_student.jsp?course_id=',
                course_id
            ]))
            soup = BeautifulSoup(page.read())
            new_url = soup.find(attrs={'id': 'new_url'})['href']
            post_bbs_id = new_url[52:new_url.find('&', 52)]
            POST_url = "".join([
                bbs_url, '&course_id=', course_id, '&post_bbs_id=', post_bbs_id
            ])
            #post bbs
            opener.open(POST_url)
            print course_id, "done!"
            count = count + 1
            if count > 2:
                break
    except Exception, e:
        print str(e)
Exemplo n.º 9
0
def load_cookies():
    '''模拟浏览器登录微博,获取cookies字符串
    '''
    mobile = WEIBO_USER
    password = WEIBO_PWD
    cookie_str = ''
    user_agent = '''Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us)
            AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4
            Mobile/7B334b Safari/531.21.10'''
    header = {'User-Agent': user_agent}
    cj = cookielib.MozillaCookieJar()
    if os.path.isfile(COOKIES_FILE):
        cj.load(COOKIES_FILE)
        #opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
        cookie_list = []
        for cookie in cj:
            if cookie.domain == '.weibo.cn':
                cookie_list.append(str(cookie).split(' ')[1])
            cookie_str = ';'.join(cookie_list)
        return cookie_str
    login_url = '''http://3g.sina.com.cn/prog/wapsite/sso/login.php?ns=1&backURL=http%3A%2F%2Fweibo.cn%2Fdpool%2Fttt%2Fhome.php%3Fs2w%3Dlogin&backTitle=%D0%C2%C0%CB%CE%A2%B2%A9&vt=4&wm=ig_0001_index'''
    res = urllib2.urlopen(urllib2.Request(login_url, headers=header))
    login_html = res.read()
    res.close()
    login_soup = BeautifulSoup(login_html)
    login_form_action = login_soup.find('form')['action']
    vk = pwd = submit = backURL = backTitle = None
    for input_box in login_soup.findAll('input'):
        if input_box['type'] == 'password':
            pwd = input_box['name']
        elif input_box['type'] == 'submit':
            submit = input_box['value']
        elif input_box['type'] == 'hidden':
            if input_box['name'] == 'vk':
                vk = input_box['value']
            elif input_box['name'] == 'backURL':
                backURL = input_box['value']
            elif input_box['name'] == 'backTitle':
                backTitle = input_box['value']
    submit = '%E7%99%BB%E5%BD%95'  #登录
    params = urllib.urlencode({
        'mobile': mobile,
        pwd: password,
        'remember': 'on',
        'backURL': backURL,
        'vk': vk,
        'submit': submit
    })
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    submit_url = 'http://3g.sina.com.cn/prog/wapsite/sso/' + login_form_action
    res = opener.open(urllib2.Request(submit_url, headers=header), params)
    redirect_html = res.read()
    res.close()
    redirect_soup = BeautifulSoup(redirect_html)
    redirect_url = redirect_soup.find('a')['href']
    res = opener.open(urllib2.Request(redirect_url, headers=header))
    res.close()
    cj.save(COOKIES_FILE, ignore_discard=True)
    cookie_list = []
    for cookie in cj:
        if cookie.domain == '.weibo.cn':
            cookie_list.append(str(cookie).split(' ')[1])
        cookie_str = ';'.join(cookie_list)
    return cookie_str
Exemplo n.º 10
0
 def __init__(self):
     self.cookie=cookielib.MozillaCookieJar()
     self.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
Exemplo n.º 11
0
#-*-coding:utf-8-*-
import urllib, urllib2, cookielib, os

url = "http://go.cgddgc.cn/auth/login"
url1 = "http://go.cgddgc.cn/user/node"
valu = {
    "email": "*****@*****.**",
    "passwd": "cgd1011",
    "remember_me": "week"
}
data = urllib.urlencode(valu)
ckjar = cookielib.MozillaCookieJar("cookie.txt")
ckproc = urllib2.HTTPCookieProcessor(ckjar)
opener = urllib2.build_opener(ckproc)
request = urllib2.Request(url, data)
respon = opener.open(request)
respon = opener.open(url1)
result = respon.read()
respon.close()
ckjar.save()
print result
Exemplo n.º 12
0
    def __init__(self,
                 username=None,
                 password=None,
                 auth=None,
                 code=None,
                 datadir=None,
                 configdir=None,
                 cachedir=None,
                 debug=False):
        self.username = username
        self.password = password

        self.datadir = datadir or DATADIR
        self.configdir = configdir or CONFIGDIR
        self.cachedir = cachedir or CACHEDIR

        if not os.path.isdir(self.configdir):
            # Create the config dir as xdg would. Let exceptions bubble up
            os.makedirs(self.configdir, 0700)

        self.cookiejar = cookielib.MozillaCookieJar(
            filename=osp.join(self.configdir, "cookies.txt"))
        try:
            self.cookiejar.load()
        except (IOError, cookielib.LoadError) as e:
            log.error('Error reading cookies: %s', e)

        if auth:
            log.info("Injecting authenticated cookie")
            expires = int(auth.split('|')[1]) + 730 * 24 * 60 * 60
            cookie = cookielib.Cookie(
                version=0,
                name='_simpleauth_sess',
                value=auth,
                port=None,
                port_specified=False,
                domain=urlsplit(self.url)[1],
                domain_specified=False,
                domain_initial_dot=False,
                path='/',
                path_specified=False,
                secure=True,
                expires=expires,
                discard=False,
                comment=None,
                comment_url=None,
                rest={},
            )
            self.cookiejar.set_cookie(cookie)

        super(HumbleBundle, self).__init__(self.url,
                                           tag=APPNAME,
                                           cookiejar=self.cookiejar,
                                           debug=debug)

        if code:
            log.info("Validating browser code at '%s/user/humbleguard'",
                     self.url)
            try:
                self.get("/user/humbleguard", {
                    'goto': "/home",
                    'qs': "",
                    'code': code.upper()
                })
            except httpbot.urllib2.HTTPError as e:
                raise HumbleBundleError("Incorrect browser verification code")

        # "purchases" in the website. May be non-bundle like Store Purchases
        self.bundles = {}
        # "subproducts" in json. May be not a game, like Soundtracks and eBooks
        self.games = {}

        # Load bundles and games
        try:
            with open(osp.join(self.configdir, "bundles.json")) as fp1:
                with open(osp.join(self.configdir, "games.json")) as fp2:
                    self.bundles = json.load(fp1)
                    self.games = json.load(fp2)
                    log.info("Loaded %d games from %d bundles",
                             len(self.games), len(self.bundles))
            self._merge()
        except IOError:
            self.update()
Exemplo n.º 13
0
def refresh_cookie():
    #创建MozillaCookieJar实例对象
    cookie = cookielib.MozillaCookieJar()
    #从文件中读取cookie内容到变量
    cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)
    return cookie
Exemplo n.º 14
0
# -*- coding: utf-8 -*-
import urllib2
import cookielib
__author__ = 'Anliven'

# 从文件中获取Cookie并访问

readcookie = cookielib.MozillaCookieJar()  # 创建MozillaCookieJar实例对象
readcookie.load('Spider_cookielib_2.txt',
                ignore_discard=True,
                ignore_expires=True)  # 从文件中读取cookie内容到变量

req = urllib2.Request("http://www.cn.bing.com")  # 创建请求的request
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(
    readcookie))  # 利用urllib2的build_opener方法创建一个opener
response = opener.open(req)
print response.read()

# 设想cookie文件中保存的是登录的cookie,那么提取出这个cookie文件内容,就可以用以上方法模拟这个人的账号登录
# 创建一个带有cookie的opener,在访问登录的URL时,将登录后的cookie保存下来,然后利用这个cookie来访问其他网址
Exemplo n.º 15
0
#-*-coding=utf-8-*-
import urllib,urllib2,re,requests,cookielib
from bs4 import BeautifulSoup

login_url="https://github.com/login"
'''
session=requests.session()
html=session.get(login_url,headers=header)
'''
html=urllib.urlopen(login_url)
soup=BeautifulSoup(html.read(),"lxml")
for input in soup.form.find_all("input"):
    if input.get("name")=="authenticity_token":
        token=input.get("value")
#print token
values={'login':'******','password':'******','commit':'Sign+in','authenticity_token':token,'utf8':'%E2%9C%93'}
data=urllib.urlencode(values)
ckjar=cookielib.MozillaCookieJar('gitcookie.txt')
handler=urllib2.HTTPCookieProcessor(ckjar)
opener=urllib2.build_opener(handler)
opener.addheaders=[('Host','https://github.com'),('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0'),('Referer','https://github.com')]
auth_url="https://github.com/session"
respon=opener.open(auth_url,data)
ckjar.save(ignore_discard=True,ignore_expires=True)
#respon=opener.open("https://github.com/")
print respon.read()
Exemplo n.º 16
0
def fetch_html_encoded_roles(
    adfs_host,
    adfs_cookie_location,
    ssl_verification_enabled,
    provider_id,
    adfs_ca_bundle=None,
    username=None,
    password=None,
    sspi=None,
    u2f_trigger_default=None,
):

    # Support for Kerberos SSO on Windows via requests_negotiate_sspi
    # also requires tricking the server into thinking we're using IEq
    # so that it servers up a redirect to the IWA page.
    if sspi:
        _headers[
            'User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'

    # Initiate session handler
    session = requests.Session()

    # LWPCookieJar has an issue on Windows when cookies have an 'expires' date too far in the future and they are converted from timestamp to datetime.
    # MozillaCookieJar works because it does not convert the timestamps.
    # Duo uses 253402300799 for its cookies which translates into 9999-12-31T23:59:59Z.
    # Windows 64bit maximum date is 3000-12-31T23:59:59Z, and 32bit is 2038-01-18T23:59:59Z.
    session.cookies = cookielib.MozillaCookieJar(filename=adfs_cookie_location)

    try:
        have_creds = (username and password) or _auth_provider
        session.cookies.load(ignore_discard=not (have_creds))
    except IOError as e:
        error_message = getattr(e, 'message', e)
        logging.debug(
            u'Attempt to load authentication cookies into session failed. '
            u'Re-authentication will be performed. '
            u'The error: {}'.format(error_message))

    if _auth_provider and sspi:
        domain = None
        if username:
            if '@' in username:  # User principal name (UPN) format
                username, domain = username.split('@', 1)
            elif '\\' in username:  # Down-level logon name format
                domain, username = username.split('\\', 1)

        if system() == 'Windows':
            auth = _auth_provider(username, password, domain)
        elif username and domain:
            auth = _auth_provider(principal="{}@{}".format(username, domain),
                                  mutual_authentication=OPTIONAL)
        else:
            auth = _auth_provider(mutual_authentication=OPTIONAL)
        data = None
    else:
        auth = None
        data = {
            'UserName': username,
            'Password': password,
            'AuthMethod': provider_id
        }

    if adfs_ca_bundle:
        ssl_verification = adfs_ca_bundle
    else:
        ssl_verification = ssl_verification_enabled

    # Opens the initial AD FS URL and follows all of the HTTP302 redirects
    authentication_url = _IDP_ENTRY_URL.format(adfs_host, provider_id)
    response = session.post(authentication_url,
                            verify=ssl_verification,
                            headers=_headers,
                            auth=auth,
                            data=data)

    logging.debug(u'''Request:
        * url: {}
        * headers: {}
    Response:
        * status: {}
        * headers: {}
        * body: {}
    '''.format(authentication_url, response.request.headers,
               response.status_code, response.headers, response.text))

    if response.status_code >= 400:
        session.cookies.clear()

    mask = os.umask(0o177)
    try:
        session.cookies.save(ignore_discard=True)
    finally:
        os.umask(mask)

    del auth
    del data
    del username
    password = '******'
    del password

    # Decode the response
    return response, session
Exemplo n.º 17
0
import quizduell
import cookielib
import json
import os
import argparse

parser = argparse.ArgumentParser(description='Give me a gameID!')
parser.add_argument("--username")
parser.add_argument("--password")

args = parser.parse_args()
username = args.username
password = args.password

# Load authenticated session from file to prevent unnecessary logins:
cookie_jar = cookielib.MozillaCookieJar('cookie_file')
api = quizduell.QuizduellApi(cookie_jar)

if os.access(cookie_jar.filename, os.F_OK):
    cookie_jar.load()
else:
    api.login_user(username, password)

api = quizduell.QuizduellApi(cookie_jar)
result = api.current_user_games()

if 'access' in result:
    # Session invalid, re-login:
    api.login_user(username, password)
    result = api.top_list_rating()
Exemplo n.º 18
0
    print('usage:python classlist.py [name] [student code]')
    sys.exit(0)
else:
    name = sys.argv[1]
    code = sys.argv[2]

baseurl = 'http://jxgl.hdu.edu.cn/xf_xsqxxxk.aspx?'

# init my opener
paramters = urllib.urlencode({
    'xh': code,
    'xm': name.decode('utf-8').encode('gbk'),
    'gnmkdm': 'N121113'
})
cookiename = 'cookie.dat'
cookie = cookielib.MozillaCookieJar(cookiename)
cookie.load(cookiename, ignore_discard=True, ignore_expires=True)
opener = getopener(cookie)

para_dct = {}
response = opener.open(baseurl + paramters)

temp_content = read(response)

viewstate = re.compile('id="__VIEWSTATE" value="(.*)"').search(
    temp_content).groups()[0]
eventvali = re.compile('id="__EVENTVALIDATION" value="(.*)"').search(
    temp_content).groups()[0]

para_dct['ddl_kcxz'] = ''
para_dct['ddl_ywyl'] = ''
Exemplo n.º 19
0
if __name__ == '__main__':
    action = sys.argv[8]
    uri = urllib2.urlparse.ParseResult(
            scheme=sys.argv[9],
            netloc=sys.argv[10],
            path=sys.argv[11],
            params='',
            query='',
            fragment='').geturl()
    set_cookie = sys.argv[12] if len(sys.argv)>12 else None

    if 'XDG_DATA_HOME' in os.environ.keys() and os.environ['XDG_DATA_HOME']:
        f = os.path.join(os.environ['XDG_DATA_HOME'],'reuzbl/cookies.txt')
    else:
        f = os.path.join(os.environ['HOME'],'.local/share/reuzbl/cookies.txt')
    jar = cookielib.MozillaCookieJar(f)

    try:
        jar.load(ignore_discard=True)
    except:
        pass

    req = urllib2.Request(uri)

    if action == 'GET':
        jar.add_cookie_header(req)
        if req.has_header('Cookie'):
            print req.get_header('Cookie')
    elif action == 'PUT':
        hdr = urllib2.httplib.HTTPMessage(StringIO.StringIO('Set-Cookie: %s' % set_cookie))
        res = urllib2.addinfourl(StringIO.StringIO(), hdr, req.get_full_url())
Exemplo n.º 20
0
def get_headers_from_response(
    url,
    post=None,
    headers=[[
        'User-Agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12'
    ]]):
    return_headers = []
    if (DEBUG == True):
        logger.info("[scrapertools.py] get_headers_from_response url=" + url)

    if post is not None:
        if (DEBUG == True): logger.info("[scrapertools.py] post=" + post)
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] post=None")

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_setting("cookies.dir"),
                                  'cookies.dat')
    if (DEBUG == True):
        logger.info("[scrapertools.py] ficherocookies=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    import cookielib
    # importing cookielib worked
    urlopen = urllib2.urlopen
    Request = urllib2.Request
    cj = cookielib.MozillaCookieJar()
    # This is a subclass of FileCookieJar
    # that has useful load and save methods

    if os.path.isfile(ficherocookies):
        if (DEBUG == True):
            logger.info("[scrapertools.py] Leyendo fichero cookies")
        # if we have a cookie file already saved
        # then load the cookies into the Cookie Jar
        try:
            cj.load(ficherocookies)
        except:
            if (DEBUG == True):
                logger.info(
                    "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                )
            os.remove(ficherocookies)

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj),
                                  NoRedirectHandler())
    urllib2.install_opener(opener)

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Traza la peticion
    if post is None:
        if (DEBUG == True): logger.info("[scrapertools.py] petición GET")
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] petición POST")

    # Array de cabeceras
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        if (DEBUG == True):
            logger.info("[scrapertools.py] header=%s" % str(header[0]))
        txheaders[header[0]] = header[1]
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")

    # Construye el request
    req = Request(url, post, txheaders)
    handle = urlopen(req)

    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    #data=handle.read()
    info = handle.info()
    if (DEBUG == True): logger.info("[scrapertools.py] Respuesta")
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    location_header = ""
    for header in info:
        if (DEBUG == True):
            logger.info("[scrapertools.py] " + header + "=" + info[header])
        return_headers.append([header, info[header]])
    handle.close()
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")

    # Tiempo transcurrido
    fin = time.clock()
    if (DEBUG == True):
        logger.info("[scrapertools.py] Descargado en %d segundos " %
                    (fin - inicio + 1))

    return return_headers
Exemplo n.º 21
0
import urllib2
import cookielib
filename = 'cookie.txt'
cookie = cookielib.MozillaCookieJar(
    filename
)  # Declare a MozillaCookieJar object instance to save cookie and write to file
handler = urllib2.HTTPCookieProcessor(
    cookie)  # Create cookie processor using urllib2.HTTPCookieProcessor object
opener = urllib2.build_opener(handler)  # Build opener through handler
response = opener.open(
    'http://www.zhihu.com')  # Equal to urllib2.urlopen method
cookie.save(ignore_discard=True, ignore_expires=True)
Exemplo n.º 22
0
def downloadpage(
    url,
    post=None,
    headers=[[
        'User-Agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12'
    ]],
    follow_redirects=True,
    timeout=socket.getdefaulttimeout()):
    if (DEBUG == True): logger.info("[scrapertools.py] downloadpage")
    if (DEBUG == True): logger.info("[scrapertools.py] url=" + url)

    if post is not None:
        if (DEBUG == True): logger.info("[scrapertools.py] post=" + post)
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] post=None")

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_setting("cookies.dir"),
                                  'cookies.dat')
    if (DEBUG == True):
        logger.info("[scrapertools.py] ficherocookies=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        if (DEBUG == True):
            logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        if (DEBUG == True):
            logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            if (DEBUG == True):
                logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            if (DEBUG == True):
                logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            if (DEBUG == True):
                logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        if (DEBUG == True):
            logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules
        if (DEBUG == True): logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            if (DEBUG == True):
                logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                if (DEBUG == True):
                    logger.info(
                        "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                    )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            if (DEBUG == True):
                logger.info(
                    "[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj), NoRedirectHandler())
            else:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            if (DEBUG == True):
                logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        if (DEBUG == True): logger.info("[scrapertools.py] petición GET")
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] petición POST")

    # Añade las cabeceras
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        if (DEBUG == True):
            logger.info("[scrapertools.py] header %s=%s" %
                        (str(header[0]), str(header[1])))
        txheaders[header[0]] = header[1]
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")

    req = Request(url, post, txheaders)
    if timeout is None:
        handle = urlopen(req)
    else:
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        deftimeout = socket.getdefaulttimeout()
        try:
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
        except:
            import sys
            for line in sys.exc_info():
                logger.error("%s" % line)

        socket.setdefaulttimeout(deftimeout)

    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    data = handle.read()
    info = handle.info()
    if (DEBUG == True): logger.info("[scrapertools.py] Respuesta")
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    for header in info:
        if (DEBUG == True):
            logger.info("[scrapertools.py] " + header + "=" + info[header])
    handle.close()
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''

    # Tiempo transcurrido
    fin = time.clock()
    if (DEBUG == True):
        logger.info("[scrapertools.py] Descargado en %d segundos " %
                    (fin - inicio + 1))

    return data
Exemplo n.º 23
0
import cookielib
import urllib2

filename = 'cookie.txt'
cookie = cookielib.MozillaCookieJar(filename)
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
response = opener.open("https://www.github.com")
cookie.save(ignore_discard=True, ignore_expires=True)
Exemplo n.º 24
0
def downloadpageGzip(url):

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat')
    if (DEBUG == True): logger.info("Cookiefile=" + ficherocookies)
    inicio = time.clock()

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                if (DEBUG == True):
                    logger.info(
                        "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                    )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #              'Referer':'http://www.megavideo.com/?s=signup'}

    import httplib
    parsedurl = urlparse.urlparse(url)
    if (DEBUG == True): logger.info("parsedurl=" + str(parsedurl))

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding': 'gzip,deflate',
        'Keep-Alive': '300',
        'Connection': 'keep-alive',
        'Referer': parsedurl[0] + "://" + parsedurl[1]
    }
    if (DEBUG == True): logger.info(str(txheaders))

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, None, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()

    fin = time.clock()
    if (DEBUG == True):
        logger.info(
            "[scrapertools.py] Descargado 'Gzipped data' en %d segundos " %
            (fin - inicio + 1))

    # Descomprime el archivo de datos Gzip
    try:
        fin = inicio
        import StringIO
        compressedstream = StringIO.StringIO(data)
        import gzip
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        data1 = gzipper.read()
        gzipper.close()
        fin = time.clock()
        if (DEBUG == True):
            logger.info(
                "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos "
                % (fin - inicio + 1))
        return data1
    except:
        return data
Exemplo n.º 25
0
def read_body_and_headers(url,post=None,headers=[],follow_redirects=False,timeout=None):
    _log("read_body_and_headers "+url)
    if post is not None: _log("read_body_and_headers post="+post)
    if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"])
    # Start cookie lib
    ficherocookies=os.path.join(get_data_path(),'cookies.dat'); _log("read_body_and_headers cookies_file="+ficherocookies); cj=None; ClientCookie=None; cookielib=None
    try: _log("read_body_and_headers importing cookielib"); import cookielib # Let's see if cookielib is available
    except ImportError:
        _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie
        try: _log("read_body_and_headers importing ClientCookie"); import ClientCookie
        except ImportError: _log("read_body_and_headers ClientCookie not available"); urlopen=urllib2.urlopen; Request=urllib2.Request # ClientCookie isn't available either
        else: _log("read_body_and_headers ClientCookie available"); urlopen=ClientCookie.urlopen; Request=ClientCookie.Request; cj=ClientCookie.MozillaCookieJar() # imported ClientCookie
    else:
        _log("read_body_and_headers cookielib available"); urlopen=urllib2.urlopen; Request=urllib2.Request; cj=cookielib.MozillaCookieJar() # importing cookielib worked
        # This is a subclass of FileCookieJar # that has useful load and save methods
    if cj is not None: # we successfully imported # one of the two cookie handling modules
        _log("read_body_and_headers Cookies enabled")
        if os.path.isfile(ficherocookies):
            _log("read_body_and_headers Reading cookie file")
            try: cj.load(ficherocookies) # if we have a cookie file already saved # then load the cookies into the Cookie Jar
            except: _log("read_body_and_headers Wrong cookie file, deleting..."); os.remove(ficherocookies)
        # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs
        if cookielib is not None:
            _log("read_body_and_headers opener using urllib2 (cookielib)")
            # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2
            if not follow_redirects: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler())
            else: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
        else:
            _log("read_body_and_headers opener using ClientCookie")
            # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie
            opener=ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)); ClientCookie.install_opener(opener)
    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------
    inicio=time.clock() # Contador
    txheaders={} # Diccionario para las cabeceras
    if post is None: _log("read_body_and_headers GET request") # Construye el request
    else: _log("read_body_and_headers POST request")
    _log("read_body_and_headers ---------------------------") # Añade las cabeceras
    for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1]))); txheaders[header[0]]=header[1]
    _log("read_body_and_headers ---------------------------"); req=Request(url,post,txheaders)
    if timeout is None: handle=urlopen(req)
    else:        
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones:
        try: import socket; deftimeout=socket.getdefaulttimeout(); socket.setdefaulttimeout(timeout); handle=urlopen(req); socket.setdefaulttimeout(deftimeout)
        except:
            import sys
            for line in sys.exc_info(): _log( "%s" % line )
    cj.save(ficherocookies) # Actualiza el almacén de cookies
    # Lee los datos y cierra
    if handle.info().get('Content-Encoding')=='gzip': buf=StringIO(handle.read()); f=gzip.GzipFile(fileobj=buf); data=f.read()
    else: data=handle.read()
    info=handle.info(); _log("read_body_and_headers Response"); returnheaders=[]; _log("read_body_and_headers ---------------------------")
    for header in info: _log("read_body_and_headers "+header+"="+info[header]); returnheaders.append([header,info[header]])
    handle.close(); _log("read_body_and_headers ---------------------------")
    '''
    # Lanza la petición
    try: response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
        # Añade las cabeceras
        for header in headers: req.add_header(header[0],header[1])
        response = urllib2.urlopen(req)
    '''
    # Tiempo transcurrido
    fin=time.clock(); _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)); _log("read_body_and_headers body="+data); return data,returnheaders
Exemplo n.º 26
0
 def _set_cookie(self, fileName):
     cookie = cookielib.MozillaCookieJar()
     cookie.load(fileName, ignore_discard=True, ignore_expires=True)
     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
     urllib2.install_opener(opener)
Exemplo n.º 27
0
    110:
    'Opera/9.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.01',
    111:
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
    112:
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
    113:
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
}

HOST = 'Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0'
#Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0
#HISTORYFILE = xbmc.translatePath(ptv.getAddonInfo('profile') + "history.xml")

#cj = cookielib.LWPCookieJar()
cj = cookielib.MozillaCookieJar()


class common:
    HOST = HOST
    HEADER = None

    def __init__(self):
        pass

    def html_special_chars(self, txt):
        txt = txt.replace('#038;', '')
        txt = txt.replace('&#34;', '"')
        txt = txt.replace('&#39;', '\'')
        txt = txt.replace('&#8221;', '"')
        txt = txt.replace('&#8222;', '"')
Exemplo n.º 28
0
    def __init__(self,
                 host,
                 auth_function,
                 user_agent,
                 source,
                 host_override=None,
                 extra_headers=None,
                 save_cookies=False,
                 auth_tries=3,
                 account_type=None,
                 debug_data=True,
                 secure=True,
                 rpc_tries=3):
        """Creates a new HttpRpcServer.

    Args:
      host: The host to send requests to.
      auth_function: A function that takes no arguments and returns an
        (email, password) tuple when called. Will be called if authentication
        is required.
      user_agent: The user-agent string to send to the server. Specify None to
        omit the user-agent header.
      source: The source to specify in authentication requests.
      host_override: The host header to send to the server (defaults to host).
      extra_headers: A dict of extra headers to append to every request. Values
        supplied here will override other default headers that are supplied.
      save_cookies: If True, save the authentication cookies to local disk.
        If False, use an in-memory cookiejar instead.  Subclasses must
        implement this functionality.  Defaults to False.
      auth_tries: The number of times to attempt auth_function before failing.
      account_type: One of GOOGLE, HOSTED_OR_GOOGLE, or None for automatic.
      debug_data: Whether debugging output should include data contents.
      rpc_tries: The number of rpc retries upon http server error (i.e.
        Response code >= 500 and < 600) before failing.
    """
        if secure:
            self.scheme = "https"
        else:
            self.scheme = "http"
        self.host = host
        self.host_override = host_override
        self.auth_function = auth_function
        self.source = source
        self.authenticated = False
        self.auth_tries = auth_tries
        self.debug_data = debug_data
        self.rpc_tries = rpc_tries

        self.account_type = account_type
        self.read_credentials = False  #have user credentials been read ?
        self.username = None
        self.password = None

        #URL of AppServer redirected to by AppLoad Balancer
        self.appserver_url = None
        #Last AppServer that was authenticated successfully
        self.last_appserver_ip = None

        self.extra_headers = {}
        if user_agent:
            self.extra_headers["User-Agent"] = user_agent
        if extra_headers:
            self.extra_headers.update(extra_headers)

        self.save_cookies = save_cookies
        self.cookie_jar = cookielib.MozillaCookieJar()
        self.opener = self._GetOpener()
        if self.host_override:
            logger.info("Server: %s; Host: %s", self.host, self.host_override)
        else:
            logger.info("Server: %s", self.host)

        if ((self.host_override and self.host_override == "localhost")
                or self.host == "localhost"
                or self.host.startswith("localhost:")):
            self._DevAppServerAuthenticate()
from requests import Session
from robobrowser import RoboBrowser
import cookielib
import pdb

session = Session()
session.verify = False  # Skip SSL verification
cj = cookielib.MozillaCookieJar('cookies.txt')
cj.load()
browser = RoboBrowser(session=session)
## Instant Queue
browser.open("http://www.netflix.com/MyList", cookies=cj)

# get the form
queue_form = browser.get_form(id='MainQueueForm')
# queue_submit = queue_form.submit_fields['evoSubmit']

predictions = []
skip_keys = ["queueHeader"]
for key in queue_form.keys():
    if key in skip_keys:
        continue
    if 'OP' in key:
        continue
    spans = browser.find_all("input", {"name": key})[0].findAllNext("span")
    for s in spans:
        if s is not None:
            for c in s['class']:
                if 'sbmf-' in c:
                    predicted_rating = c.strip("sbmf-")
                    if key not in (item[0] for item in predictions):
Exemplo n.º 30
0
            try:
                shutil.copyfile(post_review_cookies, cookie_file)
                os.chmod(cookie_file, 0600)
            except IOError, e:
                logging.warning("There was an error while copying "
                                "post-review's cookies: %s" % e)

    if not os.path.isfile(cookie_file):
        try:
            open(cookie_file, 'w').close()
            os.chmod(cookie_file, 0600)
        except IOError, e:
            logging.warning("There was an error while creating a "
                            "cookie file: %s" % e)

    return cookielib.MozillaCookieJar(cookie_file), cookie_file


class ReviewBoardServer(object):
    """Represents a Review Board server we are communicating with.

    Provides methods for executing HTTP requests on a Review Board
    server's Web API.

    The ``auth_callback`` parameter can be used to specify a callable
    which will be called when authentication fails. This callable will
    be passed the realm, and url of the Review Board server and should
    return a 2-tuple of username, password. The user can be prompted
    for their credentials using this mechanism.
    """
    def __init__(self,