def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=fbconnect://success&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location( 'https://www.facebook.com/v2.6/dialog/oauth?redirect_uri=fb464891386855067%3A%2F%2Fauthorize%2F&display=touch&state=%7B%22challenge%22%3A%22IUUkEUqIGud332lfu%252BMJhxL4Wlc%253D%22%2C%220_auth_logger_id%22%3A%2230F06532-A1B9-4B10-BB28-B29956C71AB1%22%2C%22com.facebook.sdk_client_state%22%3Atrue%2C%223_method%22%3A%22sfvc_auth%22%7D&scope=user_birthday%2Cuser_photos%2Cuser_education_history%2Cemail%2Cuser_relationship_details%2Cuser_friends%2Cuser_work_history%2Cuser_likes&response_type=token%2Csigned_request&default_audience=friends&return_scopes=true&auth_type=rerequest&client_id=' + self.CLIENT_ID + '&ret=login&sdk=ios&logger_id=30F06532-A1B9-4B10-BB28-B29956C71AB1&ext=1470840777&hash=AeZqkIcf-NEW6vBd' ) page = HTMLPage(self, self.response) form = page.get_form() form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword( CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location('https://www.facebook.com/v2.9/dialog/oauth?app_id=484681304938818&auth_type=rerequest&channel_url=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df33dd8340f36618%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener&client_id=484681304938818&display=popup&domain=www.okcupid.com&e2e=%7B%7D&fallback_redirect_uri=https%3A%2F%2Fwww.okcupid.com%2Flogin&locale=en_US&origin=1&redirect_uri=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df2ce4ca90b82cb4%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener%26frame%3Df3f40f304ac5e9&response_type=token%2Csigned_request&scope=email%2Cuser_birthday%2Cuser_photos&sdk=joey&version=v2.9') page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password self.session.headers['cookie-installing-permission'] = 'required' self.session.cookies['wd'] = '640x1033' self.session.cookies['act'] = '1563018648141%2F0' form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) script = page.doc.xpath('//script')[0].text m = re.search('access_token=([^&]+)&', script) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token')
def build_doc(self, content): content = JsonPage.build_doc(self, content) if 'data' in content: # The value contains HTML # Must be encoded into str because HTMLPage.build_doc() uses BytesIO # which expects bytes html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content['data'].encode(self.encoding)) return content
def build_doc(self, content): # Store the HTML doc to count the number of spaces self.html_doc = HTMLPage(self.browser, self.response).doc # Transform the HTML tag containing the accounts list into a JSON raw = re.search(r"syntheseController\.init\((.*)\)'>", content).group(1) d = json.JSONDecoder() # De-comment this line to debug the JSON accounts: # print json.dumps(d.raw_decode(raw)[0]) return d.raw_decode(raw)[0]
def get_profile(self, id): profile = {} if datetime.now().hour >= 18 or datetime.now().hour < 1: return profile r = None try: r = self.open('https://www.adopteunmec.com/profile/%s' % id) except BrowserUnavailable: pass if r is None or not re.match('https://www.adopteunmec.com/profile/\d+', r.url): self.login() try: r = self.open('https://www.adopteunmec.com/profile/%s' % id) except BrowserUnavailable: r = None if r is None: return {} page = HTMLPage(self, r) doc = page.doc profile['popu'] = {} for tr in doc.xpath('//div[@id="popularity"]//tr'): cols = tr.findall('td') if not cols[0].text: continue key = CleanText('./th')(tr).strip().lower() value = int(re.sub(u'[^0-9]+', u'', cols[0].text).strip()) profile['popu'][key] = value for script in doc.xpath('//script'): text = script.text if text is None: continue m = re.search("'memberLat'\s*:\s*([\-\d\.]+),", text, re.IGNORECASE) if m: profile['lat'] = float(m.group(1)) m = re.search("'memberLng'\s*:\s*([\-\d\.]+),", text, re.IGNORECASE) if m: profile['lng'] = float(m.group(1)) return profile
def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form['persistent'] = 1 form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) m = re.search('access_token=([^&]+)&', self.url) if m: self.access_token = m.group(1) self.info = self.request('/me')
def urlinfo(self, url, maxback=2): if urlparse.urlsplit(url).netloc == 'mobile.twitter.com': url = url.replace('mobile.twitter.com', 'twitter.com', 1) try: r = self.open(url, method='HEAD') body = False except HTTPNotFound as e: if maxback and not url[-1].isalnum(): return self.urlinfo(url[:-1], maxback - 1) raise e except BrowserHTTPError as e: if e.response.status_code in (501, 405): r = self.open(url) body = True else: raise e content_type = r.headers.get('Content-Type') try: size = int(r.headers.get('Content-Length')) hsize = self.human_size(size) except TypeError: size = None hsize = None is_html = ('html' in content_type) if content_type else re.match( r'\.x?html?$', url) title = None if is_html: if not body: r = self.open(url) # update size has we might not have it from headers size = len(r.content) hsize = self.human_size(size) page = HTMLPage(self, r) for title in page.doc.xpath('//head/title'): title = to_unicode(title.text_content()).strip() title = ' '.join(title.split()) if urlparse.urlsplit(url).netloc.endswith('twitter.com'): for title in page.doc.getroot().cssselect( '.permalink-tweet .tweet-text'): title = to_unicode(title.text_content()).strip() title = ' '.join(title.splitlines()) return content_type, hsize, title
def login(self, username, password): self.location( 'https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=basic_info,email,public_profile,user_about_me,user_activities,user_birthday,user_education_history,user_friends,user_interests,user_likes,user_location,user_photos,user_relationship_details&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form['persistent'] = 1 for script in page.doc.xpath('//script'): m = re.search('"_js_datr","([^"]+)"', script.text or '') if m: self.session.cookies.set('_js_datr', m.group(1)) form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) m = re.search('access_token=([^&]+)&', self.url) if m: self.access_token = m.group(1) self.info = self.request('/me')
def build_doc(self, text): doc = super(TrackPage, self).build_doc(text) content = ''.join([doc['top'], doc['tab']]) html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content.encode(self.encoding))