Exemplo n.º 1
0
def EUN(Username, Password):
    #Logging in
    br = spynner.Browser()
    br.load("http://www.bits-pilani.ac.in:12349/Login.aspx")
    br.wk_fill('input[name="TextBox1"]', Username)
    br.wk_fill('input[name="TextBox2"]', Password)
    br.click("input[type=submit]",
             wait_load=True,
             wait_requests=None,
             timeout=None)

    #Password chck
    if str(br.url
           ) == "http://www.bits-pilani.ac.in:12349/Student/StudentHome.aspx":
        #Loading the arrears page
        br.load("http://www.bits-pilani.ac.in:12349/Student/Dues.aspx")
        br.click("input[name=Button2]",
                 wait_load=True,
                 wait_requests=None,
                 timeout=None)

        #Creating a soup object
        plain_text = str(br.html)
        soup = BeautifulSoup(plain_text)

        #Extraction of data
        tableData = soup.find("table", attrs={"id": "arrearGridView"})
        cells = tableData.findAll('td')
        #Printing the dues
        for item in cells[-1]:
            print "Your dues : " + item.string
    else:
        print "Entered Username and Password do not match"
    br.close()
Exemplo n.º 2
0
 def __init__(self, lrules, lua, lh, ljq):
     self.rules = lrules
     self.browser = spynner.Browser(user_agent=lua,
                                    ignore_ssl_errors=False,
                                    headers=lh)
     self.browser.load_jquery(ljq)
     self.browser.set_url_filter(self.url_filter_ext)
Exemplo n.º 3
0
    def open(self, url0):
        html_body = ""
        try:
            self.browser = spynner.Browser()
            #self.browser.show()

            #h_heads = [("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1")]
            #h_heads = [("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36")]
            # 加载页面, 超时时长120s
            #self.browser.load(url=url0, load_timeout=120, headers=h_heads)
            self.browser.load(url=url0, load_timeout=120)

            #将页面滚动条拖到底部
            #js="var q=document.documentElement.scrollTop=10000"
            #self.browser.runjs(js)
            #self.browser.wait(15)
            #self.browser.wait_load(15)

            html_body = str(self.browser.html)
        except:
            # 下载动态网页失败
            html_body = ""
        finally:
            self.browser.close()

        return html_body
Exemplo n.º 4
0
 def process_request(self, request, spider):
     normal_id = spider.normal_id
     try:
         if type(request) is not FormRequest:
             browser = spynner.Browser()
             browser.create_webview()
             browser.set_html_parser(pyquery.PyQuery)
             browser.hide()
             browser.load(request.url, load_timeout=50, tries=3)
             html = browser.html
             html = html.encode('utf-8')
             body = str(html)
             return HtmlResponse(url=request.url, body=body)
     except spynner.SpynnerTimeout:
         print '超时%s' % request.url
         self.col.update({'normal_id': normal_id},
                         {'$set': {
                             'state': 'error'
                         }})
     except Exception as e:
         print e.message
         self.col.update({'normal_id': normal_id},
                         {'$set': {
                             'state': 'error'
                         }})
Exemplo n.º 5
0
    def __init__(self):
        # 浏览器对象
        agent = comm.random_useragent.getRandomUAItem()
        # self.m_browser = spynner.Browser()
        self.m_browser = spynner.Browser(user_agent=agent)
        # self.m_browser.set_proxy("58.52.201.119:8080")
        self.m_browser.hide()
        # self.m_browser.show()

        # 创建数据库对象
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        # 当前进度
        self.curr_prog = 0
        # 总共丢弃记录数
        self.drop_count = 0

        # 应用:康爱多天猫全量商品详情
        #TODO: 
        # self.app_id = conf.app_conf.app_tmall_all_products_detail
        # self.class_id = conf.class_conf.cls_tmall_all_products_detail
        self.app_id = 999
        self.class_id = 999
Exemplo n.º 6
0
    def __init__(self, num_placa):
        """
        Constructor
        """
        browser = spynner.Browser(
            user_agent=
            'Mozilla/5.0 (X11; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1'
        )
        browser.create_webview()

        #poner placa en mayuscula
        self.placa = num_placa.upper()

        self.headers = {
            'Host': 'soaprd.sbs.gob.ec:7778',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Language': 'es-ES,es;q=0.8'
        }

        index_url = 'http://soaprd.sbs.gob.ec:7778/AppWGP/sbs_soat_index.jsp'

        browser.load(index_url)

        gen_principal_url = 'http://soaprd.sbs.gob.ec:7778/AppWGP/sbs_gen_principal.jsp'

        browser.load(gen_principal_url)

        #la url de inicio
        self.url = 'http://soaprd.sbs.gob.ec:7778/AppWGP/sparametrosappgen'

        #variables get a pasar a la URL
        variables = "hid_codSoftware=110&hid_codReporte=6&hid_target=centroUP&txt_q_placa=%s" % self.placa

        datos_headers = dict(
            self.headers, **{
                'Origin':
                'http://soaprd.sbs.gob.ec:7778',
                'Content-Type':
                'application/x-www-form-urlencoded',
                'Referer':
                'http://soaprd.sbs.gob.ec:7778/AppSoat/sbs_soat_ctrConsulta.jsp?hid_codSoftware=110&hid_codReporte=6&hid_target=centroUP&hid_soporteExcel=S&COD_SOFTWARE=110&COD_UNIDAD_SUBUNIDAD=SRT&NOM_UNIDAD_SUBUNIDAD=SUBDIRECCION%20DE%20RECURSOS%20TECNOL%D3GICOS&COD_EMP=1747&PORTAL_USER=PCARGUA&STS_PERMITE_I=N&STS_PERMITE_D=N&STS_PERMITE_U=N&STS_PERMITE_S=S&COD_OPCION=393'
            })

        req = QNetworkRequest(QUrl(self.url))
        for k, v in datos_headers.items():
            req.setRawHeader(k, v)

        browser.webframe.load(req, QNetworkAccessManager.PostOperation,
                              variables)

        browser.wait_load()
        datos = unicode(browser.webframe.toHtml())
        browser.close()

        self.parse_data(datos)
Exemplo n.º 7
0
def main():
    browser = spynner.Browser()
    browser.load(url)
    #while 'ORIGIN' not in browser.html:
    browser.wait(3)
    #browser.wait_load(3)
    html = browser.html
    #web.store_content_in_file(html, '/tmp/spynner.html', overwrite=True)
    print len(browser.html)
Exemplo n.º 8
0
    def __init__(self, user_agent=None, **kwargs):
        try:
            import spynner
        except ImportError:
            raise DependencyNotInstalledError('spynner')

        if user_agent is None:
            user_agent = 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'

        self.br = spynner.Browser(user_agent=user_agent, **kwargs)
 def __init__(self):
     comm.PLog.Log("运行实例:天猫处方药列表")
     agent = comm.random_useragent.getRandomUAItem()
     self.browser = spynner.Browser(user_agent=agent)
     # 设置代理
     # browser.set_proxy('http://219.133.31.120:8888')
     self.browser.hide()
     self.db_oper = comm.db_helper.db_helper_class(conf.db_conf)
     # 真实得到的目标链接数量
     self.target_link_cnt = 0
Exemplo n.º 10
0
    def get_spynner(self):
        if not self._spynner:
            try:
                import spynner
            except ImportError:
                raise SkipTest(
                    "Spynner must be installed if you want to use it")

            self._spynner = spynner.Browser()

        return self._spynner
Exemplo n.º 11
0
 def process_request(self, request, spider):
     browser = spynner.Browser()
     browser.create_webview()
     browser.set_html_parser(pyquery.PyQuery)
     browser.load(request.url, 20)
     try:
         browser.wait_load(10)
     except:
         pass
     return HtmlResponse(request.url,
                         body=str(self.fixCharset(browser.html)))
Exemplo n.º 12
0
    def getCookie(self):
        from time import sleep
        import re

        self.errmsg = ""
        tid = ""
        self.cookie = ""
        self.cookie_photo = ""
		
        browser = spynner.Browser(debug_level=spynner.DEBUG, debug_stream=debug_stream)
       
        try:
			########  로그인 하자~~~            
            browser.load("http://www.cyworld.com/cymain/?f=cymain")
            browser.load_jquery(force=True)
            browser.fill('input[name="ID"]', self.email)
            browser.fill('input[name="PASSWD"]', self.passwd)
            
            if self.debug:  open("beforeClick.html","w").write(browser.html.encode("mbcs"))
            browser.click("input[name=btnLOGIN]")
            browser.wait(self.PAGE_LOAD_WAIT_TIME)
            
            if self.debug:  open("afterLogin.html","w").write(browser.html.encode("mbcs"))
				
            # 로그인 실패 여부 체크
            if self.email not in browser.html:
            	self.errmsg = "이메일 혹은 비밀번호가 일치하지 않습니다. 다시 확인해 주세요."
            	return
            elif self.debug:
            	print "cookcook.getCookie: 이메일/비번 오류없음"
            
            c = browser.get_cookies()
            self.tid = self.getTidFromCookie(c)
            if self.debug:  open("cookie.txt", "w").write(c)
            
            #===============================================================
            #self.tid = "21251087"
            #===============================================================

            # 사진첩 게시판 로딩후 쿠기 얻기
            browser.load("http://minihp.cyworld.com/svcs/MiniHp.cy/index/%s?tid=%s&urlstr=phot" % (self.tid, self.tid))
            browser.wait(self.PAGE_LOAD_WAIT_TIME)
            c = browser.get_cookies()
            self.cookie_photo = self.make_cookie_photo(c)
            if self.debug:	open("cookie_photo.txt", "w").write(self.cookie_photo)
            
            
        except Exception, msg:
            if self.debug:
                print "Exception: ", msg
                traceback.print_exc(file=sys.stdout)
            self.errmsg = "로그인에 문제가 있습니다. 인터넷 연결 상태 및 로그인 이메일과 비밀번호를 다시 확인하세요...."
            return
def gcta_spider(cancertype, tierclass):
	browser = spynner.Browser()
	#browser.show()
	browser.hide()

	try:
		browser.load(url='http://54.84.12.177/PanCanFusV2/Fusions!cancerType')
	except spynner.SpynnerTimeout:
		print 'Timeout.'
	else:
		# 输入搜索关键字
		# browser.wk_fill('select[id="cancerType"]', 'BRCA')
		browser.wk_select('[id="cancerType"]', cancertype)

		# browser.wk_fill('select[id="tier"]', 'tier1')
		browser.wk_select('[id="tier"]', tierclass)

		# 点击搜索按钮,并等待页面加载完毕  
		browser.wk_click('input[type="submit"]', wait_load=True)

		# 获取页面的HTML
		html = browser.html

		# get total pages
		pageNum = getNumOfPagesFromHtml(html)

		fusionGenePairs = []

		# first page
		if pageNum > 0:
			p = 1
			print 'processing page %d of %d' % (p, pageNum)
			fusionGenePairs = extractFusionGenePairsFromHtml(html)
		

		# second to last page
		if pageNum > 1:
			for i in xrange(1, pageNum):
				try:
					browser.wk_click('[id="fusions_next"]')
					html = browser.html
					tmp = extractFusionGenePairsFromHtml(html)
					fusionGenePairs.extend(tmp)
					tmp = []
					p = i + 1
					print 'processing page %d of %d' % (p, pageNum)
				except:
					print 'failed to click next page'
					break
				else:
					continue
	browser.close()
	return fusionGenePairs
Exemplo n.º 14
0
 def process_request(self, request, spider):
     browser = spynner.Browser()
     browser.create_webview()
     browser.set_html_parser(pyquery.PyQuery)
     browser.load(request.url, 20)
     try:
         browser.wait_load(10)
     except:
         pass
     string = browser.html
     string = string.encode('utf-8')
     renderedBody = str(string)
     return HtmlResponse(request.url, body=renderedBody)
Exemplo n.º 15
0
    def __init__(self):
        self.m_browser = spynner.Browser()
        self.m_browser.hide()

        # 数据库对象
        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        self.app_id = conf.app_conf.app_360haoyao_id
        self.class_id = conf.class_conf.class_360haoyao_all
Exemplo n.º 16
0
 def process_request(self, request, spider):
     if request.url.find("nuomi") != -1:
         browser = spynner.Browser()
         browser.create_webview()
         browser.set_html_parser(pyquery.PyQuery)
         # browser.load(request.url, 20)
         browser.load(url=request.url, load_timeout=120, tries=1)
         try:
             browser.wait_load(1)
         except:
             pass
         html = str(browser.webframe.toHtml().toUtf8())
         renderedBody = str(html)
         return HtmlResponse(request.url, body=renderedBody)
    def run(self):
        while True:
            n, site = self.queue.get()
            url = site.strip()
            result_path = url
            result_path = result_path.split("/")[-1]
            zip_path = result_path + ".zip"
            zip_file = os.path.join(out_dir, zip_path)
            result_path = result_path + ".html"
            result_file = os.path.join(out_dir, result_path)
            if os.path.exists(zip_file):
                self.out_queue.put((zip_file, 0))
                self.queue.task_done()
                continue

            # creating and closing browser is wasteful but guarantees no
            # memory issues
            browser = spynner.Browser()
            browser.create_webview(True)
            try:
                browser.load(url, load_timeout=20)
            except spynner.browser.SpynnerTimeout:
                print("Load timeout reading %i, %s" % (n, url))

            print("Scraping %i, %s" % (n, url))
            try:
                f = open(result_file, mode="w")
                read = browser._get_html()
                f.writelines(read)
                f.close()

                zf = zipfile.ZipFile(zip_file, mode='w')
                zf.write(result_file, compress_type=compression)
                os.remove(result_file)
                zf.close()
                self.out_queue.put((zip_file, 0))

            except TypeError:
                print("Error reading %i, %s" % (n, url))
                self.out_queue.put((zip_file, 2))
            except spynner.browser.SpynnerTimeout:
                print("Timeout reading %i, %s" % (n, url))
                self.out_queue.put((zip_file, 3))
            except:
                print("Unknown error reading %i, %s" % (n, url))
                self.out_queue.put((zip_file, 4))
            browser.close()
            del browser
            self.queue.task_done()
Exemplo n.º 18
0
 def process_request(self, request, spider):
     #        if spider.name in settings.WEBKIT_DOWNLOADER:
     #            if( type(request) is not FormRequest ):
     browser = spynner.Browser()
     browser.create_webview()
     browser.set_html_parser(pyquery.PyQuery)
     browser.load(request.url, 20)
     try:
         browser.wait_load(10)
     except:
         pass
     string = browser.html
     string = string.encode('utf-8')
     renderedBody = str(string)
     return HtmlResponse(request.url, body=renderedBody)
Exemplo n.º 19
0
def get_gzh_url(gzh_name):
	browser = spynner.Browser()
	browser.show()
	wurl = unicode('http://weixin.sogou.com/weixin?type=1&query={}&ie=utf8&_sug_=n&_sug_type_='.format(gzh_name))
	try:
		browser.load(url=wurl)
	except spynner.SpynnerTimeout:
		print 'Timeout.'
	else:
		html = browser.html
		soup = BeautifulSoup(html)
		info = soup.find(id="sogou_vr_11002301_box_0")
		gzh_url = info.get('href')
		browser.close()
	return gzh_url
Exemplo n.º 20
0
def loadPage(url):
    # get browser object
    #browser = spynner.Browser(debug_level = spynner.DEBUG)
    browser = spynner.Browser()

    # create browser window
    browser.create_webview()
    browser.show()

    # load login page
    try:
        browser.load_jquery(True)
        browser.load(url)
        return browser
    except SpynnerTimeout:
        print("could not load page")
        return
Exemplo n.º 21
0
    def process_request(self, request, spider):
        browser = spynner.Browser()
        #if 'Cookie' in request.headers.keys():
        #  browser.set_cookies(request.headers.Cookie)
        browser.create_webview()
        browser.set_html_parser(pyquery.PyQuery)
        browser.load(request.url, 300)
        try:
            browser.wait_load(10)
        except:
            pass

        string = browser.html.encode('utf-8')
        renderedBody = str(string)
        browser.close()
        #return HtmlResponse(request.url,Cookies=browser.cookies,body=renderedBody)
        return HtmlResponse(request.url, body=renderedBody)
Exemplo n.º 22
0
    def run(self):
        url = os.path.join(common.URL, "_cnc/channelclient")

        browser = spynner.Browser()
        browser.create_webview(True)
        browser.load(url, load_timeout=30, tries=True)
        browser.set_javascript_prompt_callback(self._message_listener)
        while self.kill == False:
            if self.token and not self.connected and self.connecting:
                browser.runjs("openChannel('" + self.token + "')")
                self.connected = True
                self.connecting = False
            if self.token == None and self.connected:
                browser.runjs("closeSocket()")
            browser.wait(1)

        browser.close()
Exemplo n.º 23
0
 def parse(self, response):
     try:
         browser = spynner.Browser()
         browser.show()
         try:
             browser.load(response.url, load_timeout=60, tries=3)  #登录页面
         except spynner.SpynnerTimeout:
             print 'download %s timeout' % response.url
             self.col.update({'vendor': self.vendor},
                             {'$set': {
                                 'state': 'error'
                             }})
         else:
             browser.wk_fill('input[id="modlgn_username"]',
                             'lowseasonwind')  #填充用户名和密码
             browser.wk_fill('input[id="modlgn_passwd"]', 'zhuimeng7')
             browser.wait(3)
             browser.runjs("document.getElementById('form-login').submit();"
                           )  #提交form表单
             browser.wait(5)
             try:
                 browser.load(
                     'http://www.kingview.com/downloads/software.html'
                 )  #登陆后加载软件下载页面
             except spynner.SpynnerTimeout:
                 print 'download %s timeout' % 'http://www.kingview.com/downloads/software.html'
                 self.col.update({'vendor': self.vendor},
                                 {'$set': {
                                     'state': 'error'
                                 }})
             else:
                 print 'goto software page %s' % browser.url
                 body = browser.html
                 body = str(body)
                 return self.parse_item(
                     HtmlResponse(
                         url=
                         'http://www.kingview.com/downloads/software.html',
                         body=body))
             #这里必须用return,不能用yield,否则会报错,其次必须修改spynner browser.py477行,否则会乱码
     except Exception as e:
         self.col.update({'vendor': self.vendor},
                         {'$set': {
                             'state': 'error'
                         }})
Exemplo n.º 24
0
def get_articles_url(gzh_url):
	a_urls = []
	base_url = "http://mp.weixin.qq.com"
	browser = spynner.Browser()
	browser.show()
	try:
		browser.load(url=gzh_url)
	except spynner.SpynnerTimeout:
		print 'Timeout.'
	else:
		html = browser.html
		soup = BeautifulSoup(html)
		for link in soup.findAll("h4"):
			f_url = link.get('hrefs')
			f_url = base_url + f_url
			a_urls.append(f_url)
		browser.close()
	return a_urls
Exemplo n.º 25
0
 def load_html(self):
     """
     load html using spynner
     """
     #
     browser = spynner.Browser()
     #
     browser.hide()
     # browser.show()
     try:
         browser.load(self.url, load_timeout=300)
         browser.wait(self.wait)
         html = browser.html
     except spynner.SpynnerTimeout:
         html = None
     else:
         html = browser.html
     browser.close()
     return html
Exemplo n.º 26
0
def main():
    try:
        br = spynner.Browser()
        status, list_of_versions = get_app_versions(557137623,
                                                    b)  # Angry Birds Star Wars
        #status, list_of_versions = get_app_versions(284882215, br) # Facebook
        #status, list_of_versions = get_app_versions(310633997, br) # Whatsapp
        if list_of_versions:
            for version in list_of_versions:
                print '_id:', version['_id']
                print 'App ID:', version['app_id']
                print 'Date:', version['date']
                print 'Unixtime:', version['unixtime']
                print 'Number:', version['number']
                for update in version['updates']:
                    print '-', update
                print ''
            # End of for loop.
        # End of if statement.
    finally:
        br.close()
Exemplo n.º 27
0
    def __init__(self):
        # 浏览器对象
        agent = comm.random_useragent.getRandomUAItem()
        # self.m_browser = spynner.Browser()
        self.m_browser = spynner.Browser(user_agent=agent)
        self.m_browser.hide()
        # self.m_browser.show()

        self.db_oper = db_helper_class(conf.db_conf)
        # 分页大小
        self.page_size = 20
        # 读取偏移值
        self.r_offset = 0

        # 当前进度
        self.curr_prog = 0
        # 总共丢弃记录数
        self.drop_count = 0

        # 应用:天猫保健品详情
        self.app_id = conf.app_conf.app_tmall_health_prods_detail
        self.class_id = conf.class_conf.cls_tmall_health_prods_detail
Exemplo n.º 28
0
def browse(url,spynner_browser_timeout,proxy=None):
    """
    模拟浏览器访问url地址,返回html源文
    :param url:  url地址
    :param spynner_browser_timeout:  超时时间
    :param proxy:  代理地址
    :return: html源文
    """

    urlpret = urlparse.urlparse(url)

    browser = spynner.Browser(
                    user_agent="Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",
                    headers=[("Referer", '%s://%s' % (urlpret.scheme, urlpret.netloc))])
    if proxy !=None:
        browser.set_proxy(proxy)

    browser.load(url=url, load_timeout=spynner_browser_timeout)
    html=browser.html


    browser.close()
    return html
    def __init__(self):
        # 浏览器对象
        agent = comm.random_useragent.getRandomUAItem()
        # self.m_browser = spynner.Browser()
        self.m_browser = spynner.Browser(user_agent=agent)
        self.m_browser.hide()
        # self.m_browser.show()

        # 数据库对象
        self.m_db_obj = comm.db_helper.db_helper_class(conf.db_conf)

        # 清理会话
        self.clear_session()

        #作业标识
        #TODO:
        # self.app_id = conf.app_conf.app_tmall_all_products_list
        # self.class_id = conf.class_conf.cls_tmall_all_products_list
        self.app_id = 999
        self.class_id = 999
        self.job_id = "%s_%s_%s" % (time.strftime('T%Y%m%d%H%M'), self.app_id,
                                    self.class_id)
        self.job_id = "T201612311000_106_800010"
Exemplo n.º 30
0
 def test(self):
     IMG = self.img
     URL = self.url
     assert self.proxyg is not None, "no global proxy set"
     assert self.proxyd is not None, "no download proxy set"
     br = self.browser = spynner.Browser(ignore_ssl_errors=False,
                                         user_agent=self.user_agent,
                                         debug_level=spynner.WARNING,
                                         debug_stream=sys.stderr)
     br.show()
     data, content = {}, {}
     # no proxy
     data['noproxy'] = br.download(IMG)
     br.load(URL, None)
     content['noproxy'] = br.html
     # no proxy - alt1
     br.set_proxy("")
     data["proxy_void"] = br.download(IMG)
     br.load(URL, None)
     content["proxy_void"] = br.html
     # no proxy - alt2
     br.set_proxy(None)
     data["proxy_none"] = br.download(IMG)
     br.load(URL, None)
     content["proxy_none"] = br.html
     # global proxy
     br.set_proxy(self.proxyg)
     data["proxy_g"] = br.download(IMG)
     br.load(URL, None)
     content["proxy_g"] = br.html
     # use a proxy only @ download level
     br.load(URL)
     data["proxy_d"] = br.download(IMG, proxy_url=self.proxyd)
     for i in data:
         if data["noproxy"] != data[i]:
             raise Exception("Download failed for %s" % i)