def login(self, login, password): payload = {'validate': {'PASSWORD_LOOKUP': [{'id': self.form_id, 'login': login.encode(self.browser.ENCODING).upper(), 'password': password.encode(self.browser.ENCODING), 'type': 'PASSWORD_LOOKUP' }] } } req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) self.logger.debug(doc) if 'phase' in doc and doc['phase']['state'] == 'TERMS_OF_USE': # Got: # {u'phase': {u'state': u'TERMS_OF_USE'}, u'validationUnits': [{u'LIST_OF_TERMS': [{u'type': u'TERMS', u'id': u'b7f28f91-7aa0-48aa-8028-deec13ae341b', u'reference': u'CGU_CYBERPLUS'}]}]} payload = {'validate': doc['validationUnits'][0]} req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) self.logger.debug(doc) if ('phase' in doc and doc['phase']['previousResult'] == 'FAILED_AUTHENTICATION') or \ doc['response']['status'] != 'AUTHENTICATION_SUCCESS': raise BrowserIncorrectPassword() self.browser.location(doc['response']['saml2_post']['action'], urllib.urlencode({'SAMLResponse': doc['response']['saml2_post']['samlResponse']}))
def get_current_direct_large(self, _id): json_data = self.openurl('http://%s/sites/default/files/import_si/si_titre_antenne/FIP_player_current.json' % self.id2domain(_id)) data = json.load(json_data) artist = unicode(data['current']['song']['interpreteMorceau']) title = unicode(data['current']['song']['titre']) return (artist, title)
def on_loaded(self): warn = self.document.xpath('//div[@id="message_renouvellement_mot_passe"]') if len(warn) > 0: raise BrowserIncorrectPassword(warn[0].text) # load content of loading divs. divs = [] for div in self.document.xpath('//div[starts-with(@id, "as_")]'): loading = div.xpath('.//span[@class="loading"]') if len(loading) == 0: continue input = div.xpath('.//input')[0] divs.append([div, input.attrib['name']]) if len(divs) > 0: args = {} for i, (div, name) in enumerate(divs): args['key%s' % i] = name args['div%s' % i] = div.attrib['id'] args['time'] = 0 r = self.browser.openurl(self.browser.buildurl('/AsynchAjax', **args)) data = json.load(r) for i, (div, name) in enumerate(divs): html = data['data'][i]['flux'] div.clear() div.insert(0, etree.fromstring(html, parser=etree.HTMLParser()))
def load_async(self, time): # load content of loading divs. lst = self.document.xpath( '//input[@type="hidden" and starts-with(@id, "asynch")]') if len(lst) > 0: params = {} for i, input in enumerate(lst): params['key%s' % i] = input.attrib['name'] params['div%s' % i] = input.attrib['value'] params['time'] = time r = self.browser.openurl( self.browser.buildurl('/AsynchAjax', **params)) data = json.load(r) for i, d in enumerate(data['data']): div = self.document.xpath('//div[@id="%s"]' % d['key'])[0] html = d['flux'] div.clear() div.attrib['id'] = d[ 'key'] # needed because clear removes also all attributes div.insert(0, etree.fromstring(html, parser=etree.HTMLParser())) if 'time' in data: sleep(float(data['time']) / 1000.0) return self.load_async(time)
def login(self, login, password): payload = { 'validate': { 'PASSWORD_LOOKUP': [{ 'id': self.form_id, 'login': login.encode(self.browser.ENCODING).upper(), 'password': password.encode(self.browser.ENCODING), 'type': 'PASSWORD_LOOKUP' }] } } req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) self.logger.debug(doc) if ('phase' in doc and doc['phase']['previousResult'] == 'FAILED_AUTHENTICATION') or \ doc['response']['status'] != 'AUTHENTICATION_SUCCESS': raise BrowserIncorrectPassword() self.browser.location( doc['response']['saml2_post']['action'], urllib.urlencode({ 'SAMLResponse': doc['response']['saml2_post']['samlResponse'] }))
def set_details(self, v): # try to get as much from the page itself obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]') if len(obj) > 0: v.title = unicode(obj[0].text) obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') if len(obj) > 0: v.date = parse_dt(obj[0].attrib['content']) #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]') if len(obj) > 0: v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) # for the rest, use the JSON config descriptor json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) data = json.load(json_data) if data is None: raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id)) #print data if v.title is None: v.title = unicode(data['video']['title']) if v.thumbnail is None: v.thumbnail = Thumbnail(unicode(data['video']['thumbnail'])) v.duration = datetime.timedelta(seconds=int(data['video']['duration'])) # determine available codec and quality # use highest quality possible quality = 'sd' codec = None if 'vp6' in data['video']['files']: codec = 'vp6' if 'vp8' in data['video']['files']: codec = 'vp8' if 'h264' in data['video']['files']: codec = 'h264' if not codec: raise BrokenPageError('Unable to detect available codec for id: %r' % int(v.id)) if 'hd' in data['video']['files'][codec]: quality = 'hd' v.url = unicode("http://player.vimeo.com/play_redirect?quality=%s&codecs=%s&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local" % (quality, codec, int(v.id), data['request']['timestamp'] , data['request']['signature'])) # attempt to determine the redirected URL to pass it instead # since the target server doesn't check for User-Agent, unlike # for the source one. # HACK: we use mechanize directly here for now... FIXME #print "asking for redirect on '%s'" % (v.url) self.browser.set_handle_redirect(False) try: self.browser.open_novisit(v.url) except HTTPError, e: if e.getcode() == 302 and hasattr(e, 'hdrs'): #print e.hdrs['Location'] v.url = unicode(e.hdrs['Location'])
def get_current_direct(self, _id): json_data = self.openurl('http://%s/sites/default/files/direct.json?_=%s' % (self.id2domain(_id), int(time()))) data = json.load(json_data) document = self.parser.parse(StringIO(data.get('html'))) artist = document.findtext('//span[@class="artiste"]') title = document.findtext('//span[@class="titre"]') artist = unicode(artist) if artist else None title = unicode(title) if title else None return (artist, title)
def get_full_profile(self, id): if self.GIRL_PROXY is not None: res = self.openurl(self.GIRL_PROXY % id) profile = json.load(res) if 'lat' in profile and 'lng' in profile: profile['dist'] = self.get_dist(profile['lat'], profile['lng']) else: profile = self.get_profile(id) return profile
def login(self, login, password): payload = { 'validate': { 'PASSWORD_LOOKUP': [{ 'id': self.form_id, 'login': login.encode(self.browser.ENCODING).upper(), 'password': password.encode(self.browser.ENCODING), 'type': 'PASSWORD_LOOKUP' }] } } req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) self.logger.debug(doc) if 'phase' in doc and doc['phase']['state'] == 'TERMS_OF_USE': # Got: # {u'phase': {u'state': u'TERMS_OF_USE'}, u'validationUnits': [{u'LIST_OF_TERMS': [{u'type': u'TERMS', u'id': u'b7f28f91-7aa0-48aa-8028-deec13ae341b', u'reference': u'CGU_CYBERPLUS'}]}]} if 'reference' in doc['validationUnits'][0]: del doc['validationUnits'][0]['reference'] payload = {'validate': doc['validationUnits'][0]} req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) self.logger.debug(doc) if ('phase' in doc and doc['phase']['previousResult'] == 'FAILED_AUTHENTICATION') or \ doc['response']['status'] != 'AUTHENTICATION_SUCCESS': raise BrowserIncorrectPassword() self.browser.location( doc['response']['saml2_post']['action'], urllib.urlencode({ 'SAMLResponse': doc['response']['saml2_post']['samlResponse'] }))
def iter_gallery_images(self, gallery): ean = self.get_ean() pages = json.load(self.browser.openurl( 'http://www.izneo.com/playerv2/ajax.php?ean=%s&action=get_list_jpg' % ean)) for page in pages['list']: width = 1200 # maximum width yield BaseImage(page['page'], gallery=gallery, url=("http://www.izneo.com/playerv2/%s/%s/%s/%d/%s" % (page['expires'], page['token'], ean, width, page['page'])))
def get_current_direct(self, _id): json_data = self.openurl( 'http://%s/sites/default/files/direct.json?_=%s' % (self.id2domain(_id), int(time()))) data = json.load(json_data) document = self.parser.parse(StringIO(data.get('html'))) artist = document.findtext('//span[@class="artiste"]') title = document.findtext('//span[@class="titre"]') artist = unicode(artist) if artist else None title = unicode(title) if title else None return (artist, title)
def iter_gallery_images(self, gallery): ean = self.get_ean() pages = json.load(self.browser.openurl( 'http://www.izneo.com/playerv2/ajax.php?ean=%s&action=get_list_jpg' % ean)) for page in pages: width = 1200 # maximum width yield BaseImage(page['page'], gallery=gallery, url=("http://www.izneo.com/playerv2/%s/%s/%s/%d/%s" % (page['expires'], page['token'], ean, width, page['page'])))
def login(self, login, password): payload = {'validate': {'PASSWORD_LOOKUP': [{'id': self.form_id, 'login': login.encode(self.browser.ENCODING), 'password': password.encode(self.browser.ENCODING), 'type': 'PASSWORD_LOOKUP' }] } } req = self.browser.request_class(self.request_url + '/step') req.add_header('Content-Type', 'application/json') r = self.browser.openurl(req, json.dumps(payload)) doc = json.load(r) if ('phase' in doc and doc['phase']['previousResult'] == 'FAILED_AUTHENTICATION') or \ doc['response']['status'] != 'AUTHENTICATION_SUCCESS': raise BrowserIncorrectPassword() self.browser.location(doc['response']['saml2_post']['action'], urllib.urlencode({'SAMLResponse': doc['response']['saml2_post']['samlResponse']}))
def load_async(self, time): # load content of loading divs. lst = self.document.xpath('//input[@type="hidden" and starts-with(@id, "asynch")]') if len(lst) > 0: params = {} for i, input in enumerate(lst): params['key%s' % i] = input.attrib['name'] params['div%s' % i] = input.attrib['value'] params['time'] = time r = self.browser.openurl(self.browser.buildurl('/AsynchAjax', **params)) data = json.load(r) for i, d in enumerate(data['data']): div = self.document.xpath('//div[@id="%s"]' % d['key'])[0] html = d['flux'] div.clear() div.attrib['id'] = d['key'] # needed because clear removes also all attributes div.insert(0, etree.fromstring(html, parser=etree.HTMLParser())) if 'time' in data: sleep(float(data['time'])/1000.0) return self.load_async(time)
def get_token(self): vary = None if self.group_dict.get('vary', None) is not None: vary = self.group_dict['vary'] else: for script in self.document.xpath('//script'): if script.text is None: continue m = re.search("'vary', '([\d-]+)'\)", script.text) if m: vary = m.group(1) break r = self.browser.openurl(self.browser.request_class(self.browser.buildurl(self.browser.absurl("/portailinternet/_layouts/Ibp.Cyi.Application/GetuserInfo.ashx"), action='UInfo', vary=vary), None, {'Referer': self.url})) doc = json.load(r) m = re.search("vary=([\d-]+)", doc['accountContent']) if m: vary = m.group(1) else: self.logger.warning("Vary not found") r = self.browser.openurl(self.browser.request_class(self.browser.buildurl(self.browser.absurl('/portailinternet/Transactionnel/Pages/CyberIntegrationPage.aspx'), vary=vary), 'taskId=aUniversMesComptes', {'Referer': self.url})) doc = self.browser.get_document(r) date = None for script in doc.xpath('//script'): if script.text is None: continue m = re.search('lastConnectionDate":"([^"]*)"', script.text) if m: date = m.group(1) r = self.browser.openurl(self.browser.request_class(self.browser.absurl('/cyber/ibp/ate/portal/integratedInternet.jsp'), 'session%%3Aate.lastConnectionDate=%s&taskId=aUniversMesComptes' % date, {'Referer': r.geturl()})) v = urlsplit(r.geturl()) args = dict(parse_qsl(v.query)) return args['token']
def search_geo(self, pattern): fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1)) return json.load(fp)
def search_geo(self, pattern): fp = self.openurl(self.buildurl('http://www.pap.fr/index/ac-geo', q=pattern.encode('utf-8'))) return json.load(fp)
def get_current_direct(self, _id): json_data = self.openurl('http://%s/sites/default/files/direct.json?_=%s' % (self.id2domain(_id), int(time()))) data = json.load(json_data) title = unicode(data['rf_titre_antenne']['titre']) artist = unicode(data['rf_titre_antenne']['interprete']) return (artist, title)
def on_loaded(self): r = self.browser.openurl(self.request_url) doc = json.load(r) self.form_id = doc['step']['validationUnits'][0]['PASSWORD_LOOKUP'][0]['id']
def search_geo(self, pattern): fp = self.openurl( self.buildurl('http://www.pap.fr/index/ac-geo', q=pattern.encode('utf-8'))) return json.load(fp)
def on_loaded(self): r = self.browser.openurl(self.request_url) doc = json.load(r) self.form_id = doc['step']['validationUnits'][0]['PASSWORD_LOOKUP'][0][ 'id']
def parse(self, data, encoding=None): return json.load(data, encoding=encoding)
def set_details(self, v): # try to get as much from the page itself obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]') if len(obj) > 0: v.title = unicode(obj[0].text) obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') if len(obj) > 0: v.date = parse_dt(obj[0].attrib['content']) #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]') if len(obj) > 0: v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) # for the rest, use the JSON config descriptor json_data = self.browser.openurl( 'http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) data = json.load(json_data) if data is None: raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id)) #print data if v.title is None: v.title = unicode(data['video']['title']) if v.thumbnail is None: v.thumbnail = Thumbnail(unicode(data['video']['thumbnail'])) v.duration = datetime.timedelta(seconds=int(data['video']['duration'])) # determine available codec and quality # use highest quality possible quality = 'sd' codec = None if 'vp6' in data['video']['files']: codec = 'vp6' if 'vp8' in data['video']['files']: codec = 'vp8' if 'h264' in data['video']['files']: codec = 'h264' if not codec: raise BrokenPageError( 'Unable to detect available codec for id: %r' % int(v.id)) if 'hd' in data['video']['files'][codec]: quality = 'hd' v.url = unicode( "http://player.vimeo.com/play_redirect?quality=%s&codecs=%s&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local" % (quality, codec, int(v.id), data['request']['timestamp'], data['request']['signature'])) # attempt to determine the redirected URL to pass it instead # since the target server doesn't check for User-Agent, unlike # for the source one. # HACK: we use mechanize directly here for now... FIXME #print "asking for redirect on '%s'" % (v.url) self.browser.set_handle_redirect(False) try: self.browser.open_novisit(v.url) except HTTPError, e: if e.getcode() == 302 and hasattr(e, 'hdrs'): #print e.hdrs['Location'] v.url = unicode(e.hdrs['Location'])
def search_geo(self, pattern): fp = self.openurl( self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1)) return json.load(fp)