def test_cookienav(): """ Test browsing while getting new cookies """ b = BaseBrowser() r = b.location(HTTPBIN + 'cookies') assert len(json.loads(r.text)['cookies']) == 0 r = b.location(HTTPBIN + 'cookies/set/hello/world') assert len(json.loads(r.text)['cookies']) == 1 assert json.loads(r.text)['cookies']['hello'] == 'world' r = b.location(HTTPBIN + 'cookies/set/hello2/world2') assert len(json.loads(r.text)['cookies']) == 2 assert json.loads(r.text)['cookies']['hello2'] == 'world2' r = b.location(REQUESTBIN) assert 'session' in r.cookies # requestbin should give this by default assert 'hello' not in r.cookies # we didn't send the wrong cookie # return to httpbin, check we didn't give the wrong cookie r = b.location(HTTPBIN + 'cookies') assert 'session' not in json.loads(r.text)['cookies'] # override cookies temporarily r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'}) assert len(json.loads(r.text)['cookies']) == 1 assert json.loads(r.text)['cookies']['bla'] == 'bli' # reload, the "fake" cookie should not be there r = b.location(HTTPBIN + 'cookies') assert len(json.loads(r.text)['cookies']) == 2 assert 'bla' not in json.loads(r.text)['cookies']
def test_cookieredirect(): """ Test cookie redirection security """ rurl = choice(REDIRECTS2) b = BaseBrowser() r = b.location(HTTPBIN + 'cookies') assert len(json.loads(r.text)['cookies']) == 0 # add a cookie to the redirection service domain (not the target!) cookie = b.cookies.build('k', 'v1', rurl) b.cookies.set(cookie) r = b.location(rurl) assert r.url == HTTPBIN + 'cookies' # the cookie was not forwarded; it's for another domain # this is important for security reasons, # and because python-requests tries to do it by default! assert len(json.loads(r.text)['cookies']) == 0 # add a cookie for the target cookie = b.cookies.build('k', 'v2', HTTPBIN) b.cookies.set(cookie) r = b.location(rurl) assert r.url == HTTPBIN + 'cookies' assert len(json.loads(r.text)['cookies']) == 1 assert json.loads(r.text)['cookies']['k'] == 'v2' # check all cookies sent in the request chain assert r.cookies == {'k': 'v2'} assert r.history[0].cookies['k'] == 'v1' # some services add other cookies
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): if type not in self.TYPES: raise TypeNotSupported self.update_header() result = self.form_item.open(data="{'rubrique': '%s'}" % self.TYPES.get(type)) biens = json.loads(json.loads(result.content)['d']) for house_type in house_types: id_type = self.RET[type].get(house_type, '1') data = {} data['rubrique'] = self.TYPES.get(type) data['ach_id'] = None data['FromMoteur'] = "true" for bien in biens: if bien['Idchoix'] == int(id_type): data['lstSSTbien'] = bien['SsTypebien'] data['lstTbien'] = bien['TypeBien'] data['Caracteristique'] = bien['Idchoix'] data['OrigineAlerte'] = "SaveSearchMoteurHome" data['pays'] = "fra" data['prix_min'] = cost_min if cost_min and cost_min > 0 else None data['prix_max'] = cost_max if cost_max and cost_max > 0 else None data['lstThemes'] = "" min_rooms = nb_rooms if nb_rooms else None if not min_rooms: data['lstNbPieces'] = 0 else: data['lstNbPieces'] = ','.join('%s' % n for n in range(min_rooms, 6)) data['lstNbChambres'] = None data['surface_min'] = area_min if area_min else None # var localisationType = { "all": -1, "ville": 5, "region": 2, "departement": 4, "pays": 1, "regionUsuelle": 3 }; data['localisationType'] = 5 data['reference'] = '' data['rayon'] = 0 data['localisation_id_rayon'] = None data['lstLocalisationId'] = ','.join(cities) data['photos'] = 0 data['colocation'] = '' data['meuble'] = '' data['pageNumber'] = 1 data['order_by'] = 1 data['sort_order'] = 1 data['top'] = 25 data['SaveSearch'] = "false" data['EmailUser'] = "" data['GSMUser'] = "" self.search.go(data="{'p_SearchParams':'%s', 'forcealerte':'0'}" % json.dumps(data)) data = '{pageIndex: 1,source:"undefined"}' for item in self.search_result.go(data=data).iter_housings(): yield item
def test_redirects(): """ Check redirects are followed """ b = BaseBrowser() b.location(HTTPBIN + 'redirect/1') assert b.url == HTTPBIN + 'get' r = b.location(HTTPBIN + 'redirect/1') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert r.url == HTTPBIN + 'get' # Normal redirect chain b.url = None r = b.location(HTTPBIN + 'redirect/4') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert len(r.history) == 4 assert r.history[3].request.url == HTTPBIN + 'redirect/1' assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2' assert r.history[2].request.url == HTTPBIN + 'redirect/2' assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3' assert r.history[1].request.url == HTTPBIN + 'redirect/3' assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4' assert r.history[0].request.url == HTTPBIN + 'redirect/4' assert r.history[0].request.headers.get('Referer') is None assert r.url == HTTPBIN + 'get' # Disable all referers r = b.location(HTTPBIN + 'redirect/2', referrer=False) assert json.loads(r.text)['headers'].get('Referer') is None assert len(r.history) == 2 assert r.history[1].request.headers.get('Referer') is None assert r.history[0].request.headers.get('Referer') is None assert r.url == HTTPBIN + 'get' # Only overrides first referer r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert len(r.history) == 2 assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2' assert r.history[0].request.headers.get('Referer') == 'http://example.com/' assert r.url == HTTPBIN + 'get' # Don't follow r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False) assert len(r.history) == 0 assert r.url == HTTPBIN + 'redirect/2' assert r.status_code == 302
def get_list(self): accounts = [] previous_account = None noaccounts = self.get_from_js('_js_noMvts =', ';') if noaccounts is not None: assert 'avez aucun compte' in noaccounts return [] txt = self.get_from_js('_data = new Array(', ');', is_list=True) if txt is None: raise BrowserUnavailable('Unable to find accounts list in scripts') data = json.loads('[%s]' % txt.replace("'", '"')) for line in data: a = Account() a.id = line[self.COL_ID].replace(' ', '') if re.match(r'Classement=(.*?):::Banque=(.*?):::Agence=(.*?):::SScompte=(.*?):::Serie=(.*)', a.id): a.id = str(CleanDecimal().filter(a.id)) a._acc_nb = a.id.split('_')[0] if len(a.id.split('_')) > 1 else None a.label = MyStrip(line[self.COL_LABEL], xpath='.//div[@class="libelleCompteTDB"]') # This account can be multiple life insurance accounts if a.label == 'ASSURANCE VIE-BON CAPI-SCPI-DIVERS *': continue a.balance = Decimal(FrenchTransaction.clean_amount(line[self.COL_BALANCE])) a.currency = a.get_currency(line[self.COL_BALANCE]) a.type = self.get_account_type(a.label) # The parent account must be created right before if a.type == Account.TYPE_CARD: # duplicate if find_object(accounts, id=a.id): self.logger.warning('Ignoring duplicate card %r', a.id) continue a.parent = previous_account if line[self.COL_HISTORY] == 'true': a._inv = False a._link = self.get_history_link() a._args = self.make__args_dict(line) else: a._inv = True a._args = {'_ipc_eventValue': line[self.COL_ID], '_ipc_fireEvent': line[self.COL_FIRE_EVENT], } a._link = self.doc.xpath('//form[@name="changePageForm"]')[0].attrib['action'] if a.type is Account.TYPE_CARD: a.coming = a.balance a.balance = Decimal('0.0') accounts.append(a) previous_account = a return accounts
def iter_persons(self, pattern): params = [("partner", self.PARTNER_KEY), ("q", pattern), ("format", "json"), ("filter", "person")] res = self.__do_request("search", params) if res is None: return jres = json.loads(res) if "person" not in jres["feed"]: return for p in jres["feed"]["person"]: thumbnail_url = NotAvailable if "picture" in p: thumbnail_url = unicode(p["picture"]["href"]) person = Person(p["code"], unicode(p["name"])) desc = u"" if "birthDate" in p: desc += "(%s), " % p["birthDate"] if "activity" in p: for a in p["activity"]: desc += "%s, " % a["$"] person.real_name = NotLoaded person.birth_place = NotLoaded person.birth_date = NotLoaded person.death_date = NotLoaded person.gender = NotLoaded person.nationality = NotLoaded person.short_biography = NotLoaded person.short_description = desc.strip(", ") person.roles = NotLoaded person.thumbnail_url = thumbnail_url yield person
def iter_movies(self, pattern): params = [("partner", self.PARTNER_KEY), ("q", pattern), ("format", "json"), ("filter", "movie")] res = self.__do_request("search", params) if res is None: return jres = json.loads(res) if "movie" not in jres["feed"]: return for m in jres["feed"]["movie"]: tdesc = u"" if "title" in m: tdesc += "%s" % m["title"] if "productionYear" in m: tdesc += " ; %s" % m["productionYear"] elif "release" in m: tdesc += " ; %s" % m["release"]["releaseDate"] if "castingShort" in m and "actors" in m["castingShort"]: tdesc += " ; %s" % m["castingShort"]["actors"] short_description = tdesc.strip("; ") thumbnail_url = NotAvailable if "poster" in m: thumbnail_url = unicode(m["poster"]["href"]) movie = Movie(m["code"], unicode(m["originalTitle"])) movie.other_titles = NotLoaded movie.release_date = NotLoaded movie.duration = NotLoaded movie.short_description = short_description movie.pitch = NotLoaded movie.country = NotLoaded movie.note = NotLoaded movie.roles = NotLoaded movie.all_release_dates = NotLoaded movie.thumbnail_url = thumbnail_url yield movie
def get_person_biography(self, id): params = [ ("partner", self.PARTNER_KEY), ("code", id), ("profile", "large"), ("mediafmt", "mp4-lc"), ("filter", "movie"), ("striptags", "biography,biographyshort"), ("format", "json"), ] res = self.__do_request("person", params) if res is not None: jres = json.loads(res) if "person" in jres: jres = jres["person"] else: return None else: return None biography = NotAvailable if "biography" in jres: biography = unicode(jres["biography"]) return biography
def confirm(self): form = self.get_form(id='authentification') url = self.browser.BASEURL + '//sec/vkm/gen_crypto?estSession=0' infos_data = self.browser.open(url).text infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) infos['grid'] = self.decode_grid(infos) url = self.browser.BASEURL + '/sec/vkm/gen_ui?modeClavier=0&cryptogramme=' + infos["crypto"] content = self.browser.open(url).content img = Captcha(BytesIO(content), infos) try: img.build_tiles() except TileError as err: error("Error: %s" % err) if err.tile: err.tile.display() pwd = img.get_codes(self.browser.password[:6]) t = pwd.split(',') newpwd = ','.join(t[self.strange_map[j]] for j in range(6)) form['codsec'] = newpwd form['cryptocvcs'] = infos["crypto"].encode('iso-8859-1') form['vkm_op'] = 'sign' form.submit()
def iter_accounts(self): for line in self.doc.xpath( '//script[@id="initial-state"]')[0].text.split('\n'): m = re.search('window.__INITIAL_STATE__ = (.*);', line) if m: data = json.loads(literal_eval(m.group(1))) break else: assert False, "data was not found" assert data[13] == 'core' assert len(data[14]) == 3 assert len(data[14][2]) == 85 assert data[14][2][63] == 'products' assert len(data[14][2][64]) == 2 assert data[14][2][64][1][4] == 'productsList' accounts_data = data[14][2][64][1][5] for account_data in accounts_data: if isinstance(account_data, str): token = account_data elif isinstance(account_data, list): acc = Account() acc.number = '-%s' % account_data[2][2] acc.label = '%s %s' % (account_data[6][4], account_data[10][-1]) acc._token = acc.id = token yield acc
def login(self, login, password): DOMAIN_LOGIN = self.browser.DOMAIN_LOGIN DOMAIN = self.browser.DOMAIN url_login = '******' + DOMAIN_LOGIN + '/index.html' base_url = 'https://' + DOMAIN url = base_url + '//sec/vk/gen_crypto?estSession=0' headers = {'Referer': url_login} request = self.browser.request_class(url, None, headers) infos_data = self.browser.readurl(request) infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) url = base_url + '//sec/vk/gen_ui?modeClavier=0&cryptogramme=' + infos[ "crypto"] img = Captcha(self.browser.openurl(url), infos) try: img.build_tiles() except TileError, err: error("Error: %s" % err) if err.tile: err.tile.display()
def get_list(self, accounts_ids): l = [] # Read the json data json_data = self.browser.readurl('/banque/PA_Autonomy-war/ProxyIAService?cleOutil=IA_SMC_UDC&service=getlstcpt&dashboard=true&refreshSession=true&cre=udc&poka=true') json_infos = json.loads(json_data) for famille in json_infos['smc']['data']['familleCompte']: id_famille = famille['idFamilleCompte'] for compte in famille['compte']: account = Account() account.label = u''+compte['libellePersoProduit'] account.currency = account.get_currency(compte['devise']) account.balance = Decimal(compte['soldeDispo']) account.coming = Decimal(compte['soldeAVenir']) account.type = self.ACCOUNT_TYPES.get(id_famille, Account.TYPE_UNKNOWN) account.id = 0 account._link_id = 'KEY'+compte['key'] # IBAN aren't in JSON # Fast method, get it from transfer page. for i,a in accounts_ids.items(): if a.label == account.label: account.id = i # But it's doesn't work with LOAN and MARKET, so use slow method : Get it from transaction page. if account.id == 0: account.id = self.browser.get_IBAN_from_account(account) l.append(account) if len(l) == 0: print 'no accounts' # oops, no accounts? check if we have not exhausted the allowed use # of this password for img in self.document.getroot().cssselect('img[align="middle"]'): if img.attrib.get('alt', '') == 'Changez votre code secret': raise BrowserPasswordExpired('Your password has expired') return l
def video_info(url): """Fetch info about a video using youtube-dl :param url: URL of the web page containing the video :rtype: :class:`weboob.capabilities.video.Video` """ if not MediaPlayer._find_in_path(os.environ['PATH'], 'youtube-dl'): raise Exception('Please install youtube-dl') try: j = json.loads( subprocess.check_output(['youtube-dl', '-f', 'best', '-J', url])) except subprocess.CalledProcessError: return v = BaseVideo(id=url) v.title = j.get('title') or NotAvailable v.ext = j.get('ext') or NotAvailable v.description = j.get('description') or NotAvailable v.url = j['url'] v.duration = j.get('duration') or NotAvailable v.author = j.get('uploader') or NotAvailable v.rating = j.get('average_rating') or NotAvailable if j.get('thumbnail'): v.thumbnail = Thumbnail(j['thumbnail']) d = j.get('upload_date', j.get('release_date')) if d: v.date = parse_date(d) return v
def api_request(self, command, **kwargs): if 'data' in kwargs: data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace') else: data = None headers = {} if not command.startswith('applications'): today = local2utc(datetime.now()).strftime('%Y-%m-%d') token = sha256(self.username + self.APITOKEN + today).hexdigest() headers['Authorization'] = 'Basic %s' % (b64encode( '%s:%s' % (self.username, self.password))) headers['X-Platform'] = 'android' headers['X-Client-Version'] = self.APIVERSION headers['X-AUM-Token'] = token url = self.buildurl(self.absurl('/api/%s' % command), **kwargs) if isinstance(url, unicode): url = url.encode('utf-8') req = self.request_class(url, data, headers) buf = self.openurl(req).read() try: r = json.loads(buf) except ValueError: raise ValueError(buf) return r
def fill_live_video(self, video, json_url): response = self.openurl(json_url) result = simplejson.loads(response.read(), self.ENCODING) quality = None if 'VSR' in result['videoJsonPlayer']: for item in result['videoJsonPlayer']['VSR']: if self.quality in item: quality = item break if not quality: url = result['videoJsonPlayer']['VSR'][0]['url'] ext = result['videoJsonPlayer']['VSR'][0]['mediaType'] else: url = result['videoJsonPlayer']['VSR'][quality]['url'] ext = result['videoJsonPlayer']['VSR'][quality]['mediaType'] video.url = u'%s' % url video.ext = u'%s' % ext if 'VDA' in result['videoJsonPlayer']: date_string = result['videoJsonPlayer']['VDA'][:-6] try: video.date = datetime.datetime.strptime(date_string, '%d/%m/%Y %H:%M:%S') except TypeError: video.date = datetime.datetime(*(time.strptime(date_string, '%d/%m/%Y %H:%M:%S')[0:6])) if 'VDU' in result['videoJsonPlayer'].keys(): video.duration = int(result['videoJsonPlayer']['VDU']) if 'IUR' in result['videoJsonPlayer']['VTU'].keys(): video.thumbnail = BaseImage(result['videoJsonPlayer']['VTU']['IUR']) video.thumbnail.url = video.thumbnail.id return video
def iter_movies(self, pattern): res = self.readurl( 'http://api.allocine.fr/rest/v3/search?partner=YW5kcm9pZC12M3M&filter=movie&q=%s&format=json' % pattern.encode('utf-8')) jres = json.loads(res) if 'movie' not in jres['feed']: return for m in jres['feed']['movie']: tdesc = u'' if 'title' in m: tdesc += '%s' % m['title'] if 'productionYear' in m: tdesc += ' ; %s' % m['productionYear'] elif 'release' in m: tdesc += ' ; %s' % m['release']['releaseDate'] if 'castingShort' in m and 'actors' in m['castingShort']: tdesc += ' ; %s' % m['castingShort']['actors'] short_description = tdesc.strip('; ') thumbnail_url = NotAvailable if 'poster' in m: thumbnail_url = unicode(m['poster']['href']) movie = Movie(m['code'], unicode(m['originalTitle'])) movie.other_titles = NotLoaded movie.release_date = NotLoaded movie.duration = NotLoaded movie.short_description = short_description movie.pitch = NotLoaded movie.country = NotLoaded movie.note = NotLoaded movie.roles = NotLoaded movie.all_release_dates = NotLoaded movie.thumbnail_url = thumbnail_url yield movie
def set_video_metadata(self, video): # The player html code with all the required information is loaded # after the main page using javascript and a special XmlHttpRequest # we emulate this behaviour from_request = self.group_dict['from'] query = urllib.urlencode({ 'from_request': from_request, 'request': '/video/%s?get_video=1' % video.id }) request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query) # This header is mandatory to have the correct answer from dailymotion request.add_header('X-Requested-With', 'XMLHttpRequest') player_html = self.browser.readurl(request) try: m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html) flashvars = urlparse.parse_qs(m.group('flashvars')) info = json.loads(flashvars['sequence'][0]) # The video parameters seem to be always located at the same place # in the structure: ['sequence'][0]['layerList'][0]['sequenceList'] # [0]['layerList'][0]['param']['extraParams']) # # but to be more tolerant to future changes in the structure, we # prefer to look for the parameters everywhere in the structure def find_video_params(data): if isinstance(data, dict): if 'param' in data and 'extraParams' in data['param']: return data['param']['extraParams'] data = data.values() if not isinstance(data, list): return None for item in data: ret = find_video_params(item) if ret: return ret return None params = find_video_params(info['sequence']) video.title = unicode(params['videoTitle']) video.author = unicode(params['videoOwnerLogin']) video.description = unicode(params['videoDescription']) video.thumbnail = BaseImage(params['videoPreviewURL']) video.thumbnail.url = unicode(params['videoPreviewURL']) video.duration = datetime.timedelta( seconds=params['mediaDuration']) except: # If anything goes wrong, we prefer to return normally, this will # allow video download to work even if we don't have the metadata pass
def load_async(self, time): total = 0 restart = True while restart: restart = False # load content of loading divs. lst = self.doc.xpath('//input[@type="hidden" and starts-with(@id, "asynch")]') if len(lst) > 0: params = {} for i, input in enumerate(lst): params['key%s' % i] = input.attrib['name'] params['div%s' % i] = input.attrib['value'] params['time'] = time r = self.browser.open('/AsynchAjax', params=params) data = json.loads(r.content) for i, d in enumerate(data['data']): div = self.doc.xpath('//div[@id="%s"]' % d['key'])[0] html = d['flux'] div.clear() div.attrib['id'] = d['key'] # needed because clear removes also all attributes div.insert(0, etree.fromstring(html, parser=etree.HTMLParser())) if 'time' in data: wait = float(data['time'])/1000.0 self.logger.debug('should wait %f more seconds', wait) total += wait if total > 120: raise BrowserUnavailable('too long time to wait') sleep(wait) restart = True
def get_video(self, video=None): if video is None: video = DailymotionVideo(self.group_dict['id']) div = self.parser.select(self.document.getroot(), 'div#content', 1) video.title = unicode(self.parser.select(div, 'span.title', 1).text).strip() video.author = unicode(self.parser.select(div, 'a.name, span.name, a[rel=author]', 1).text).strip() try: video.description = html2text(self.parser.tostring(self.parser.select(div, 'div#video_description', 1))).strip() or unicode() except BrokenPageError: video.description = u'' embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id) m = re.search('var info = ({.*?}),[^{"]', embed_page) if not m: raise BrokenPageError('Unable to find information about video') info = json.loads(m.group(1)) for key in ['stream_h264_hd1080_url','stream_h264_hd_url', 'stream_h264_hq_url','stream_h264_url', 'stream_h264_ld_url']: if info.get(key):#key in info and info[key]: max_quality = key break else: raise BrokenPageError(u'Unable to extract video URL') video.url = info[max_quality] video.set_empty_fields(NotAvailable) return video
def iter_persons(self, pattern): res = self.readurl( 'http://api.allocine.fr/rest/v3/search?partner=YW5kcm9pZC12M3M&filter=person&q=%s&format=json' % pattern.encode('utf-8')) jres = json.loads(res) if 'person' not in jres['feed']: return for p in jres['feed']['person']: thumbnail_url = NotAvailable if 'picture' in p: thumbnail_url = unicode(p['picture']['href']) person = Person(p['code'], unicode(p['name'])) desc = u'' if 'birthDate' in p: desc += '(%s), ' % p['birthDate'] if 'activity' in p: for a in p['activity']: desc += '%s, ' % a['$'] person.real_name = NotLoaded person.birth_place = NotLoaded person.birth_date = NotLoaded person.death_date = NotLoaded person.gender = NotLoaded person.nationality = NotLoaded person.short_biography = NotLoaded person.short_description = desc.strip(', ') person.roles = NotLoaded person.thumbnail_url = thumbnail_url yield person
def obj__formats(self): player = Regexp(CleanText('//script'), '.*buildPlayer\((.*}})\);.*', default=None)(self) if player: info = json.loads(player) if info.get('error') is not None: raise ParseError(info['error']['title']) metadata = info.get('metadata') formats = {} for quality, media_list in metadata['qualities'].items(): for media in media_list: media_url = media.get('url') if not media_url: continue type_ = media.get('type') if type_ == 'application/vnd.lumberjack.manifest': continue ext = determine_ext(media_url) if ext in formats: if quality in formats.get(ext): formats[ext][quality] = media_url else: formats[ext] = {quality: media_url} else: formats[ext] = {quality: media_url} return formats
def get_history(self): txt = self.get_from_js('ListeMvts_data = new Array(', ');') if txt is None: no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')') if no_trans is not None: # there is no transactions for this account, this is normal. return else: # No history on this account return data = json.loads('[%s]' % txt.replace('"', '\\"').replace("'", '"')) for line in data: t = Transaction() if self.is_coming is not None: t.type = t.TYPE_CARD date = self.parser.strip(line[self.COL_DEBIT_DATE]) else: date = self.parser.strip(line[self.COL_DATE]) raw = self.parser.strip(line[self.COL_LABEL]) t.parse(date, raw) t.set_amount(line[self.COL_VALUE]) if t.date is NotAvailable: continue if self.set_coming(t): continue yield t
def get_history(self, acc_type): txt = self.get_from_js('ListeMvts_data = new Array(', ');\n') if txt is None: no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')') if no_trans is not None: # there is no transactions for this account, this is normal. return else: # No history on this account return data = json.loads('[%s]' % txt.replace('"', '\\"').replace("'", '"')) for line in data: t = Transaction() if acc_type is Account.TYPE_CARD: date = vdate = self.parser.strip(line[self.COL_DEBIT_DATE]) else: date = self.parser.strip(line[self.COL_DATE]) vdate = self.parser.strip(line[self.COL_DEBIT_DATE]) raw = self.parser.strip(line[self.COL_LABEL]) t.parse(date, raw, vdate=vdate) t.set_amount(line[self.COL_VALUE]) if self.condition(t, acc_type): continue yield t
def get_transactions(self): data = [] for script in self.doc.xpath('//script'): txt = script.text if txt is None: continue start = txt.find(self.JSON_PREFIX) if start < 0: continue txt = txt[start+len(self.JSON_PREFIX) :start+txt[start:].find(';')] data = json.loads(txt) break for tr in data: t = Transaction(0) text = tr[self.ROW_TEXT] t.parse(tr[self.ROW_DATE], text) if "+" in tr[self.ROW_DEBIT]: t.set_amount(credit=tr[self.ROW_DEBIT]) else: t.set_amount(debit=tr[self.ROW_DEBIT]) yield t
def confirm(self): form = self.get_form(id='authentification') url = self.browser.BASEURL + '//sec/vkm/gen_crypto?estSession=0' infos_data = self.browser.open(url).content infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) infos['grid'] = self.decode_grid(infos) url = self.browser.BASEURL + '/sec/vkm/gen_ui?modeClavier=0&cryptogramme=' + infos[ "crypto"] content = self.browser.open(url).content img = Captcha(BytesIO(content), infos) try: img.build_tiles() except TileError as err: error("Error: %s" % err) if err.tile: err.tile.display() pwd = img.get_codes(self.browser.password[:6]) t = pwd.split(',') newpwd = ','.join(t[self.strange_map[j]] for j in range(6)) form['codsec'] = newpwd form['cryptocvcs'] = infos["crypto"].encode('iso-8859-1') form['vkm_op'] = 'sign' form.submit()
def get_video_url(self, format=38): formats = {} for script in self.parser.select(self.document.getroot(), 'script'): text = script.text if not text: continue pattern = "yt.playerConfig = " pos = text.find(pattern) if pos < 0: continue sub = text[pos+len(pattern):pos+text[pos:].find('\n')].rstrip(';') a = json.loads(sub) for part in a['args']['url_encoded_fmt_stream_map'].split(','): args = dict(parse_qsl(part)) formats[int(args['itag'])] = args['url'] + '&signature=' + args['sig'] break # choose the better format to use. for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]: if format in formats: url = formats.get(format) ext = self.FORMAT_EXTENSIONS.get(format, 'flv') return url, ext raise BrokenPageError('Unable to find file URL')
def build_token(self, token): """ These f*****g faggots have introduced a new protection on the token. Each time there is a call to SAB (selectActionButton), the token available in the form is modified with a key available in JS: ipsff(function(){TW().ipthk([12, 25, 17, 5, 23, 26, 15, 30, 6]);}); Each value of the array is an index for the current token to append the char at this position at the end of the token. """ table = None for script in self.document.xpath('//script'): if script.text is None: continue m = re.search(r'ipthk\(([^\)]+)\)', script.text, flags=re.MULTILINE) if m: table = json.loads(m.group(1)) if table is None: return token for i in table: token += token[i] return token
def iter_persons(self, pattern): params = [('partner', self.PARTNER_KEY), ('q', pattern), ('format', 'json'), ('filter', 'person')] res = self.__do_request('search', params) if res is None: return jres = json.loads(res) if 'person' not in jres['feed']: return for p in jres['feed']['person']: thumbnail_url = NotAvailable if 'picture' in p: thumbnail_url = unicode(p['picture']['href']) person = Person(p['code'], unicode(p['name'])) desc = u'' if 'birthDate' in p: desc += '(%s), ' % p['birthDate'] if 'activity' in p: for a in p['activity']: desc += '%s, ' % a['$'] person.real_name = NotLoaded person.birth_place = NotLoaded person.birth_date = NotLoaded person.death_date = NotLoaded person.gender = NotLoaded person.nationality = NotLoaded person.short_biography = NotLoaded person.short_description = desc.strip(', ') person.roles = NotLoaded person.thumbnail_url = thumbnail_url yield person
def obj__formats(self): player = Regexp(CleanText('//script'), '.*var config = ({"context".*}}});\s*buildPlayer\(config\);.*', default=None)(self) if player: info = json.loads(player) if info.get('error') is not None: raise ParseError(info['error']['title']) metadata = info.get('metadata') formats = {} for quality, media_list in metadata['qualities'].items(): for media in media_list: media_url = media.get('url') if not media_url: continue type_ = media.get('type') if type_ == 'application/vnd.lumberjack.manifest': continue ext = determine_ext(media_url) if ext in formats: if quality in formats.get(ext): formats[ext][quality] = media_url else: formats[ext] = {quality: media_url} else: formats[ext] = {quality: media_url} return formats return None
def parse(self, el): item = XPath(u'//script[@type="application/ld+json"]')(self) json_content = CleanText(u'.', replace=[('//<![CDATA[ ', ''), (' //]]>', '')])(item[1]) self.el = json.loads(json_content)
def iter_movies(self, pattern): res = self.readurl("http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s" % pattern.encode("utf-8")) jres = json.loads(res) htmlparser = HTMLParser() for cat in ["title_popular", "title_exact", "title_approx"]: if cat in jres: for m in jres[cat]: tdesc = unicode(m["title_description"]) if "<a" in tdesc and ">" in tdesc: short_description = u"%s %s" % ( tdesc.split("<")[0].strip(", "), tdesc.split(">")[1].split("<")[0], ) else: short_description = tdesc.strip(", ") movie = Movie(m["id"], htmlparser.unescape(m["title"])) movie.other_titles = NotLoaded movie.release_date = NotLoaded movie.duration = NotLoaded movie.short_description = htmlparser.unescape(short_description) movie.pitch = NotLoaded movie.country = NotLoaded movie.note = NotLoaded movie.roles = NotLoaded movie.all_release_dates = NotLoaded movie.thumbnail_url = NotLoaded yield movie
def get_list(self, accounts_ids): l = [] # Read the json data json_data = self.browser.readurl('/banque/PA_Autonomy-war/ProxyIAService?cleOutil=IA_SMC_UDC&service=getlstcpt&dashboard=true&refreshSession=true&cre=udc&poka=true') json_infos = json.loads(json_data) for famille in json_infos['smc']['data']['familleCompte']: id_famille = famille['idFamilleCompte'] for compte in famille['compte']: account = Account() account.label = u''+compte['libellePersoProduit'] account.currency = account.get_currency(compte['devise']) account.balance = Decimal(str(compte['soldeDispo'])) account.coming = Decimal(str(compte['soldeAVenir'])) account.type = self.ACCOUNT_TYPES.get(id_famille, Account.TYPE_UNKNOWN) account.id = 0 account._link_id = 'KEY'+compte['key'] # IBAN aren't in JSON # Fast method, get it from transfer page. for i,a in accounts_ids.items(): if a.label == account.label: account.id = i # But it's doesn't work with LOAN and MARKET, so use slow method : Get it from transaction page. if account.id == 0: account.id = self.browser.get_IBAN_from_account(account) l.append(account) if len(l) == 0: print 'no accounts' # oops, no accounts? check if we have not exhausted the allowed use # of this password for img in self.document.getroot().cssselect('img[align="middle"]'): if img.attrib.get('alt', '') == 'Changez votre code secret': raise BrowserPasswordExpired('Your password has expired') return l
def iter_movies(self, pattern): res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res) htmlparser = HTMLParser() for cat in ['title_popular', 'title_exact', 'title_approx']: if cat in jres: for m in jres[cat]: tdesc = unicode(m['title_description']) if '<a' in tdesc and '>' in tdesc: short_description = u'%s %s' % (tdesc.split('<')[ 0].strip(', '), tdesc.split('>')[1].split('<')[0]) else: short_description = tdesc.strip(', ') movie = Movie(m['id'], htmlparser.unescape(m['title'])) movie.other_titles = NotLoaded movie.release_date = NotLoaded movie.duration = NotLoaded movie.short_description = htmlparser.unescape(short_description) movie.pitch = NotLoaded movie.country = NotLoaded movie.note = NotLoaded movie.roles = NotLoaded movie.all_release_dates = NotLoaded movie.thumbnail_url = NotLoaded yield movie
def login(self, login, password): DOMAIN_LOGIN = self.browser.DOMAIN_LOGIN DOMAIN = self.browser.DOMAIN url_login = '******' + DOMAIN_LOGIN + '/index.html' base_url = 'https://' + DOMAIN url = base_url + '//sec/vk/gen_crypto?estSession=0' headers = { 'Referer': url_login } request = self.browser.request_class(url, None, headers) infos_data = self.browser.readurl(request) infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) url = base_url + '//sec/vk/gen_ui?modeClavier=0&cryptogramme=' + infos["crypto"] img = Captcha(self.browser.openurl(url), infos) try: img.build_tiles() except TileError, err: error("Error: %s" % err) if err.tile: err.tile.display()
def login(self, login, password): infos_data = self.browser.open('/sec/vk/gen_crypto?estSession=0').text infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) url = '/sec/vk/gen_ui?modeClavier=0&cryptogramme=' + infos["crypto"] img = Captcha(BytesIO(self.browser.open(url).content), infos) try: img.build_tiles() except TileError as err: error("Error: %s" % err) if err.tile: err.tile.display() form = self.get_form(name=self.browser.LOGIN_FORM) form['user_id'] = login form['codsec'] = img.get_codes(password[:6]) form['cryptocvcs'] = infos['crypto'] form['vk_op'] = 'auth' form.url = '/authent.html' try: form.pop('button') except KeyError: pass form.submit()
def api_request(self, command, **kwargs): if 'data' in kwargs: data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace') else: data = None headers = {} if not command.startswith('applications'): today = local2utc(datetime.now()).strftime('%Y-%m-%d') token = sha256(self.username + self.APITOKEN + today).hexdigest() headers['Authorization'] = 'Basic %s' % (b64encode('%s:%s' % (self.username, self.password))) headers['X-Platform'] = 'android' headers['X-Client-Version'] = self.APIVERSION headers['X-AUM-Token'] = token url = self.buildurl(self.absurl('/api/%s' % command), **kwargs) if isinstance(url, unicode): url = url.encode('utf-8') req = self.request_class(url, data, headers) buf = self.openurl(req).read() try: r = json.loads(buf) except ValueError: raise ValueError(buf) return r
def video_info(url): """Fetch info about a video using youtube-dl :param url: URL of the web page containing the video :rtype: :class:`weboob.capabilities.video.Video` """ if not MediaPlayer._find_in_path(os.environ['PATH'], 'youtube-dl'): raise Exception('Please install youtube-dl') try: j = json.loads(subprocess.check_output(['youtube-dl', '-f', 'best', '-J', url])) except subprocess.CalledProcessError: return v = BaseVideo(id=url) v.title = j.get('title') or NotAvailable v.ext = j.get('ext') or NotAvailable v.description = j.get('description') or NotAvailable v.url = j['url'] v.duration = j.get('duration') or NotAvailable v.author = j.get('uploader') or NotAvailable v.rating = j.get('average_rating') or NotAvailable if j.get('thumbnail'): v.thumbnail = Thumbnail(j['thumbnail']) d = j.get('upload_date', j.get('release_date')) if d: v.date = parse_date(d) return v
def api0_request(self, command, action, parameter='', data=None, nologin=False): if data is None: # Always do POST requests. data = '' elif isinstance(data, (list,tuple,dict)): data = urlencode(data) elif isinstance(data, unicode): data = data.encode('utf-8') url = self.buildurl('http://api.adopteunmec.com/api.php', S=self.APIKEY, C=command, A=action, P=parameter, O='json') buf = self.openurl(url, data).read() try: r = json.loads(buf[buf.find('{'):]) except ValueError: raise ValueError(buf) if 'errors' in r and r['errors'] != '0' and len(r['errors']) > 0: code = r['errors'][0] if code in (u'0.0.2', u'1.1.1', u'1.1.2'): if not nologin: self.login() return self.api0_request(command, action, parameter, data, nologin=True) else: raise BrowserIncorrectPassword(AuMException.ERRORS[code]) else: raise AuMException(code) return r
def get_video_by_quality(self, url, quality): _url = url \ + '/' + quality \ + '.json' response = self.openurl(_url) return simplejson.loads(response.read(), self.ENCODING)
def get_history(self): txt = self.get_from_js('ListeMvts_data = new Array(', ');') if txt is None: no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')') if no_trans is not None: # there is no transactions for this account, this is normal. return else: # No history on this account return data = json.loads('[%s]' % txt.replace('"', '\\"').replace("'", '"')) for line in data: t = Transaction(line[self.COL_ID]) if self.is_coming is not None: t.type = t.TYPE_CARD date = self.parser.strip(line[self.COL_DEBIT_DATE]) else: date = self.parser.strip(line[self.COL_DATE]) raw = self.parser.strip(line[self.COL_LABEL]) t.parse(date, raw) t.set_amount(line[self.COL_VALUE]) if t.date is NotAvailable: continue if self.set_coming(t): continue yield t
def login(self, login, password): infos_data = self.browser.open( '/sec/vk/gen_crypto?estSession=0').content infos_data = re.match('^_vkCallback\((.*)\);$', infos_data).group(1) infos = json.loads(infos_data.replace("'", '"')) url = '/sec/vk/gen_ui?modeClavier=0&cryptogramme=' + infos["crypto"] img = Captcha(BytesIO(self.browser.open(url).content), infos) try: img.build_tiles() except TileError as err: error("Error: %s" % err) if err.tile: err.tile.display() form = self.get_form(name=self.browser.LOGIN_FORM) form['user_id'] = login form['codsec'] = img.get_codes(password[:6]) form['cryptocvcs'] = infos['crypto'] form['vk_op'] = 'auth' form.url = '/authent.html' try: form.pop('button') except KeyError: pass form.submit()
def get_project(self, project_name): project = super(IssuesPage, self).get_project(project_name) if len(project['statuses']) > 0: return project args = self.get_from_js('var availableFilters = ', ';') if args is None: return project args = json.loads(args) def get_values(key): values = [] if key not in args: return values for key, value in args[key]['values']: if value.isdigit(): values.append((value, key)) return values project['members'] = get_values('assigned_to_id') project['categories'] = get_values('category_id') project['versions'] = get_values('fixed_version_id') project['statuses'] = get_values('status_id') return project
def get_location(self, ipaddr): res = self.browser.location(u'http://ip-api.com/json/%s' % ipaddr) jres = json.loads(res.text) if "status" in jres and jres["status"] == "fail": raise Exception("IPAPI failure : %s" % jres["message"]) iploc = IpLocation(ipaddr) iploc.city = u'%s' % jres['city'] iploc.region = u'%s' % jres['regionName'] iploc.zipcode = u'%s' % jres['zip'] iploc.country = u'%s' % jres['country'] if jres['lat'] != '': iploc.lt = float(jres['lat']) else: iploc.lt = 0.0 if jres['lon'] != '': iploc.lg = float(jres['lon']) else: iploc.lg = 0.0 #iploc.host = 'NA' #iploc.tld = 'NA' if 'isp' in jres: iploc.isp = u'%s' % jres['isp'] return iploc
def get_location(self, ipaddr): res = self.browser.location('http://ip-api.com/json/%s' % ipaddr.encode('utf-8')) jres = json.loads(res.text) if "status" in jres and jres["status"] == "fail": raise Exception("IPAPI failure : %s" % jres["message"]) iploc = IpLocation(ipaddr) iploc.city = u'%s'%jres['city'] iploc.region = u'%s'%jres['regionName'] iploc.zipcode = u'%s'%jres['zip'] iploc.country = u'%s'%jres['country'] if jres['lat'] != '': iploc.lt = float(jres['lat']) else: iploc.lt = 0.0 if jres['lon'] != '': iploc.lg = float(jres['lon']) else: iploc.lg = 0.0 #iploc.host = 'NA' #iploc.tld = 'NA' if 'isp' in jres: iploc.isp = u'%s'%jres['isp'] return iploc
def iter_persons(self, pattern): params = [('partner', self.PARTNER_KEY), ('q', pattern.encode('utf-8')), ('format', 'json'), ('filter', 'person')] res = self.__do_request('search', params) if res is None: return jres = json.loads(res) if 'person' not in jres['feed']: return for p in jres['feed']['person']: thumbnail_url = NotAvailable if 'picture' in p: thumbnail_url = unicode(p['picture']['href']) person = Person(p['code'], unicode(p['name'])) desc = u'' if 'birthDate' in p: desc += '(%s), ' % p['birthDate'] if 'activity' in p: for a in p['activity']: desc += '%s, ' % a['$'] person.real_name = NotLoaded person.birth_place = NotLoaded person.birth_date = NotLoaded person.death_date = NotLoaded person.gender = NotLoaded person.nationality = NotLoaded person.short_biography = NotLoaded person.short_description = desc.strip(', ') person.roles = NotLoaded person.thumbnail_url = thumbnail_url yield person
def parse(self, el): item = XPath(u'//script[@type="application/ld+json"]')(self) json_content = CleanText(u'.', replace=[('//<![CDATA[ ', ''), (' //]]>', '')])(item[0]) self.el = json.loads(json_content)
def get_transactions(self): data = [] for script in self.doc.xpath('//script'): txt = script.text if txt is None: continue pattern = 'var jsonData =' start = txt.find(pattern) if start < 0: continue txt = txt[start + len(pattern):start + txt[start:].find(';')] data = json.loads(txt) break for tr in data: if tr[self.ROW_DATE] == 'En attente de comptabilisation': self.logger.debug('skipping transaction without a date: %r', tr[self.ROW_TEXT]) continue t = Transaction() t.parse(tr[self.ROW_DATE], tr[self.ROW_TEXT]) t.set_amount(tr[self.ROW_CREDIT], tr[self.ROW_DEBIT]) yield t
def get_list(self): accounts = [] txt = self.get_from_js('_data = new Array(', ');', is_list=True) if txt is None: raise BrokenPageError('Unable to find accounts list in scripts') data = json.loads('[%s]' % txt.replace("'", '"')) for line in data: a = Account() a.id = line[self.COL_ID].replace(' ', '') a._acc_nb = a.id.split('_')[0] if len( a.id.split('_')) > 1 else None fp = StringIO( unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING)) a.label = self.parser.tocleanstring( self.parser.parse(fp, self.browser.ENCODING).xpath( '//div[@class="libelleCompteTDB"]')[0]) # This account can be multiple life insurance accounts if a.label == 'ASSURANCE VIE-BON CAPI-SCPI-DIVERS *': continue a.balance = Decimal( FrenchTransaction.clean_amount(line[self.COL_BALANCE])) a.currency = a.get_currency(line[self.COL_BALANCE]) a.type = self.get_account_type(a.label) if line[self.COL_HISTORY] == 'true': a._inv = False a._link = self.get_history_link() a._args = { '_eventId': 'clicDetailCompte', '_ipc_eventValue': '', '_ipc_fireEvent': '', 'deviseAffichee': 'DEVISE', 'execution': self.get_execution(), 'idCompteClique': line[self.COL_ID], } else: a._inv = True a._args = { '_ipc_eventValue': line[self.COL_ID], '_ipc_fireEvent': line[self.COL_FIRE_EVENT], } a._link = self.document.xpath( '//form[@name="changePageForm"]')[0].attrib['action'] if a.id.find('_CarteVisa') >= 0: accounts[-1]._card_ids.append(a._args) if not accounts[-1].coming: accounts[-1].coming = Decimal('0.0') accounts[-1].coming += a.balance continue a._card_ids = [] accounts.append(a) return accounts
def post_image(self, name, contents, time): # time='first' for one-shot view params = [('time', time), ('image[]', FileField(name or '-', contents))] request = self._multipart('https://unsee.cc/upload/', params) d = json.loads(self.readurl(request)) return {'id': d['hash']}
def _get_dict(self): d = json.loads(CleanText('//div[@id="cipherdata"]')(self.doc)) if isinstance(d, list): # zerobin return d[0] else: # privatebin return d
def on_load(self): if not self.browser.no_login: raise LoggedOut() r = self.browser.open(self.request_url) doc = json.loads(r.content) self.form_id = doc['step']['validationUnits'][0]['PASSWORD_LOOKUP'][0][ 'id']
def decode_paste(self, key): d = json.loads(CleanText('//pre[@id="paste-content"]')(self.doc)) for k in ('iv', 'ct', 'salt'): d[k] = fix_base64(d[k]) decr = decrypt(key, d) # 0bin is supposed to use LZW but their js impl is such a piece of crap it doesn't compress anything # this is easier for us though hehe return b64decode(decr).decode('utf-8')
def API_post(self, data): """ Submit a POST request to the website The JSON data is parsed and returned as a dictionary """ data['format'] = 'json' result = simplejson.loads(self.readurl(self.apiurl, urllib.urlencode(data)), 'utf-8') self.check_result(result) return result