def _login(self, username, password): """log in on the web page @param username: username to log in with @param password: password """ page = self.getdoc(self.login_url) form = stoolkit.parse_form(page) form['email'] = username form['password'] = password page = self.getdoc(self.login_url, urlencode(form)) if "<b>Sessie overschrijven</b>" in html.tostring(page): form = stoolkit.parse_form(page) form['overwrite'] = 'on' self.open("http://mgl.x-cago.net/session.do", urlencode(form))
def _login(self, username, password): """log in on the web page @param username: username to log in with @param password: password """ url = LOGIN_URL.format(paper=self.paper) page = self.getdoc(url) form = toolkit.parse_form(page) form["username"] = str(username) form["password"] = str(password) page = self.open(url, urlencode(form)) cookies = page.info()["Set-Cookie"] tauidloc = cookies.rfind("TAUID") tauid = cookies[tauidloc:cookies.find(";", tauidloc)] tauid_expires = cookies[cookies.find("expires", tauidloc):cookies. find(";", cookies.find(";", tauidloc) + 1)] machineidloc = cookies.find("MACHINEID") machineid = cookies[machineidloc:cookies.find(";", machineidloc)] machineid_expires = cookies[cookies.find("expires", machineidloc ):cookies. find(";", cookies.find(";", machineidloc) + 1)] cookieheader = machineid + "; " + tauid self.opener.opener.addheaders.append(("Cookie", cookieheader)) page = self.open(url, urlencode(form))
def _login(self, username, password, retry=False): """ Parse login form and fill in wanted parts """ # Get latest paper id latest = self._get_latest() latest = latest[(sorted(latest.keys())[-1])] paper_id = int(latest['paperId']) # Build url url = LOGINURL.format(paper_id=paper_id, regio_code=self.context_id, domain=self.domain) # Login log.info("Logging in..") login_page = self.getdoc(url) form = toolkit.parse_form(login_page) form['username'] = username form['password'] = password login_page = self.open( url, urllib.urlencode(form) ) # Resolve ticket_url and save it self.ticket_url = login_page.geturl() if 'ticket' not in self.ticket_url: log.error(CHECK_CREDENTIALS) raise ValueError(CHECK_CREDENTIALS) # Handshake server com = self.create_message(messaging.CommandMessage, operation=5) req = self.create_request(com) env = self.create_envelope(req) res = self.apiget(env).bodies[0][1] self.headers.update(res.body.headers) # Send AMF Auth message to server ticket = "TICKET_%s:%s%s" % ( self.paper_id, AUTHURL.format(domain=self.domain), quote(self.ticket_url, '&') ) ticket = ticket.replace('ticket%3D', 'ticket=') ticket = ticket.replace('&zone', '%26zone') com = self.create_message(messaging.CommandMessage, operation=8) com.destination = 'auth' com.body = base64.b64encode(ticket) com.headers["DSEndpoint"] = "_IPaperOnlineServiceLocator_AMFChannel1" com.correlationId = "" env = self.create_envelope(self.create_request(com)) res = self.apiget(env) log.info("Logged in")
def _login(self, username, password): self.open(self.index_url) login_form = self.getdoc(self.login_url).cssselect("#command")[0] form = parse_form(login_form) form['username'] = username form['password'] = password self.open(self.login_url, urlencode(form))
def get_pages(self): index = self.getdoc(self.index_url) form = parse_form(index.cssselect("#globalesucheContainer")[0]) page_1 = self.getpage(0, form) yield page_1 n_pages = int(page_1.cssselect("a.pager-pagenr")[-2].text) for x in range(1, n_pages+1): yield self.getpage(x, form)
def _login(self, username, password): page = self.getdoc(LOGIN_URL) form = stoolkit.parse_form(page) form['username'] = username form['password'] = password self.opener.opener.open(LOGIN_URL, urlencode(form))
def _login(self, username, password): page = self.getdoc(LOGIN_URL) form = toolkit.parse_form(page) form['email'] = username form['password'] = password res = self.getdoc(LOGIN_URL, urlencode(form)) error = res.cssselect("td.error") if error: print("\n{error[0].text}\n".format(**locals()))
def _login(self, username, password, retry=False): """ Parse login form and fill in wanted parts """ # Get latest paper id latest = self._get_latest() latest = latest[(sorted(latest.keys())[-1])] paper_id = int(latest['paperId']) # Build url url = LOGINURL.format(paper_id=paper_id, regio_code=self.context_id, domain=self.domain) # Login log.info("Logging in..") login_page = self.getdoc(url) form = toolkit.parse_form(login_page) form['username'] = username form['password'] = password login_page = self.open(url, urllib.urlencode(form)) # Resolve ticket_url and save it self.ticket_url = login_page.geturl() if 'ticket' not in self.ticket_url: log.error(CHECK_CREDENTIALS) raise ValueError(CHECK_CREDENTIALS) # Handshake server com = self.create_message(messaging.CommandMessage, operation=5) req = self.create_request(com) env = self.create_envelope(req) res = self.apiget(env).bodies[0][1] self.headers.update(res.body.headers) # Send AMF Auth message to server ticket = "TICKET_%s:%s%s" % (self.paper_id, AUTHURL.format(domain=self.domain), quote(self.ticket_url, '&')) ticket = ticket.replace('ticket%3D', 'ticket=') ticket = ticket.replace('&zone', '%26zone') com = self.create_message(messaging.CommandMessage, operation=8) com.destination = 'auth' com.body = base64.b64encode(ticket) com.headers["DSEndpoint"] = "_IPaperOnlineServiceLocator_AMFChannel1" com.correlationId = "" env = self.create_envelope(self.create_request(com)) res = self.apiget(env) log.info("Logged in")
def _login(self, username, password): login_url = "http://www.telegraaf.nl/wuz/loginbox/epaper?nocache" self.open(self.week_url) form = parse_form(self.getdoc(login_url).cssselect("#user-login")[0]) form['name'], form['pass'] = username, password form['rhash'] = "f8ac71adde5cdb382ab5e485a8c3447210a6b69b" form['redir'] = self.week_url self.opener.opener.addheaders += [("Host" , "www.telegraaf.nl"), ("Referer", login_url)] res = self.open(login_url, urlencode(form)) if not "user_name" in str(self.opener.cookiejar): raise ValueError("wrong user/pass")
def _login(self, username, password): l_url = LOGIN_URL.format(y=self.options['date'].year, m=self.options['date'].month, d=self.options['date'].day) pagel = self.getdoc(l_url) form = toolkit.parse_form(pagel) form["sso:field:username"] = username form["sso:field:password"] = password pagel = self.open(other_url, urlencode(form)) if pagel.getcode() != 200: raise ValueError(CREDENTIALS_ERR % pagel.getcode())
def _login(self, username, password): """login method for DBScraper @param username: self.options['username'] @param password: self.options['password']""" #get initial cookies like sesid self.open(self.index_url) #get login params (whether or not from the website), encode into POST request login_form = self.getdoc(self.login_url).cssselect("form")[0] form = parse_form(login_form) form['email'] = username form['password'] = password response_json = self.open(self.login_url, urlencode(form)) #if possible check response, saves future trouble response = json.loads(response_json) if response['status'] != "ok": raise ValueError("login status returned not OK but {}".format(response))
def _login(self, username, password): """requires the username/password of a twitter account with a registered application in dev.twitter.com. the consumer key/secret and acces token key/secret are scraped from dev.twitter.com and put to use.""" login_url = "https://dev.twitter.com/user/login" doc = self.getdoc(login_url) form = parse_form(doc) form['name'] = username form['pass'] = password self.open(login_url, urlencode(form)) appsurl = "https://dev.twitter.com/apps" appsdoc = self.getdoc(appsurl) c_k, c_s, a_t, a_t_s = 0, 0, 0, 0 for app in appsdoc.cssselect("#content-main ul.apps-list li"): try: href = app.cssselect("a")[0].get('href') authhref = "/".join(href.split("/")[:-1]) + "/oauth" authdoc = self.getdoc(urljoin(appsurl, authhref)) c_k = authdoc.cssselect("#edit-consumer-key")[0].get('value') c_s = authdoc.cssselect("#edit-consumer-secret")[0].get( 'value') a_t = authdoc.cssselect("#edit-access-token")[0].get('value') a_t_s = authdoc.cssselect("#edit-access-token-secret")[0].get( 'value') except (IndexError, HTTPError): pass else: break if not c_k: raise Exception("consumer key at {} not found".format(appsurl)) auth = tweepy.OAuthHandler(c_k, c_s) auth.set_access_token(a_t, a_t_s) self.auth = auth
def _login(self, username, password): # We need a 'service' param, this'll have the page generate a cookie that we need to access the data d = self.options.get("date") month_minus = d.month - 1 self.index_url = self.index_url.format(**locals()) page = self.getdoc(self.login_url) form = stoolkit.parse_form(page) form["username"] = username form["password"] = password form["service"] = self.index_url response = self.opener.opener.open(self.login_url, urlencode(form)) response_txt = response.read().decode("utf-8") if "De opgegeven gebruikersnaam en/of wachtwoord zijn onjuist" in response_txt: raise ValueError("Incorrect username/password") elif not ".nrc.nl" in self.opener.cookiejar._cookies: raise RuntimeError("No cookies from login, something went wrong") else: return True
def _login(self, username, password): """login method for DBScraper @param username: self.options['username'] @param password: self.options['password']""" #get initial cookies like sesid self.open(self.index_url) #get login params (whether or not from the website), encode into POST request login_form = self.getdoc(self.login_url).cssselect("form")[0] form = parse_form(login_form) form['email'] = username form['password'] = password response_json = self.open(self.login_url, urlencode(form)) #if possible check response, saves future trouble response = json.loads(response_json) if response['status'] != "ok": raise ValueError( "login status returned not OK but {}".format(response))
def _login(self, username, password): """requires the username/password of a twitter account with a registered application in dev.twitter.com. the consumer key/secret and acces token key/secret are scraped from dev.twitter.com and put to use.""" login_url = "https://dev.twitter.com/user/login" doc = self.getdoc(login_url) form = parse_form(doc) form['name'] = username form['pass'] = password self.open(login_url,urlencode(form)) appsurl = "https://dev.twitter.com/apps" appsdoc = self.getdoc(appsurl) c_k,c_s,a_t,a_t_s = 0,0,0,0 for app in appsdoc.cssselect("#content-main ul.apps-list li"): try: href = app.cssselect("a")[0].get('href') authhref = "/".join(href.split("/")[:-1]) + "/oauth" authdoc = self.getdoc(urljoin(appsurl,authhref)) c_k = authdoc.cssselect("#edit-consumer-key")[0].get('value') c_s = authdoc.cssselect("#edit-consumer-secret")[0].get('value') a_t = authdoc.cssselect("#edit-access-token")[0].get('value') a_t_s = authdoc.cssselect("#edit-access-token-secret")[0].get('value') except (IndexError, HTTPError): pass else: break if not c_k: raise Exception("consumer key at {} not found".format(appsurl)) auth = tweepy.OAuthHandler(c_k, c_s) auth.set_access_token(a_t, a_t_s) self.auth = auth
def _login(self, username, password): """log in on the web page @param username: username to log in with @param password: password """ url = LOGIN_URL.format(paper=self.paper) page = self.getdoc(url) form = toolkit.parse_form(page) form["username"] = str(username) form["password"] = str(password) page = self.open(url, urlencode(form)) cookies=page.info()["Set-Cookie"] tauidloc = cookies.rfind("TAUID") tauid = cookies[tauidloc:cookies.find(";",tauidloc)] tauid_expires = cookies[cookies.find("expires",tauidloc):cookies.find(";",cookies.find(";",tauidloc)+1)] machineidloc = cookies.find("MACHINEID") machineid = cookies[machineidloc:cookies.find(";",machineidloc)] machineid_expires = cookies[cookies.find("expires",machineidloc):cookies.find(";",cookies.find(";",machineidloc)+1)] cookieheader = machineid+"; "+tauid self.opener.opener.addheaders.append(("Cookie",cookieheader)) page = self.open(url, urlencode(form))
def _login(self, username, password): doc = self.getdoc(LOGIN_URL) form = toolkit.parse_form(doc) form['Email'] = username form['Passwd'] = password self.open(LOGIN_URL, urlencode(form))
def _login(self, username, password): self.open(self.base_url) form = parse_form(self.getdoc(self.login_url).cssselect("form#command")[0]) form['username'] = username; form['password'] = password self.open(self.login_url, urlencode(form))