class ParolesmaniaBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.parolesmania.com/' searchSong = URL('recherche.php\?c=title&k=(?P<pattern>[^/]*).*', SearchSongPage) searchArtist = URL('recherche.php\?c=artist&k=(?P<pattern>[^/]*).*', SearchArtistPage) songLyrics = URL( 'paroles_(?P<artistid>[^/]*)/paroles_(?P<songid>[^/]*)\.html', LyricsPage) artistSongs = URL('paroles_(?P<artistid>[^/]*)\.html', ArtistSongsPage) def iter_lyrics(self, criteria, pattern): if criteria == 'artist': artist_ids = self.searchArtist.go(pattern=pattern).get_artist_ids() it = [] # we just take the 3 first artists to avoid too many page loadings for aid in artist_ids[:3]: it = itertools.chain( it, self.artistSongs.go(artistid=aid).iter_lyrics()) return it elif criteria == 'song': return self.searchSong.go(pattern=pattern).iter_lyrics() def get_lyrics(self, id): ids = id.split('|') try: self.songLyrics.go(artistid=ids[0], songid=ids[1]) songlyrics = self.page.get_lyrics() return songlyrics except BrowserHTTPNotFound: return
class LyricsdotcomBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.lyrics.com' search = URL('/serp.php\?st=(?P<pattern>.*)&qtype=(?P<criteria>1|2)', SearchPage) songLyrics = URL('/lyric/(?P<id>\d*)', LyricsPage) artistsong = URL('/artist/(?P<id>.*)', ArtistPages) def iter_lyrics(self, criteria, pattern): if criteria == 'song': self.search.go(pattern=pattern, criteria=1) assert self.search.is_here() for song in self.page.iter_lyrics(): yield song elif criteria == 'artist': self.search.go(pattern=pattern, criteria=2) assert self.search.is_here() for artist in self.page.iter_artists(): for song in self.artistsong.go(id=artist.id).iter_lyrics(): yield song def get_lyrics(self, id): return self.songLyrics.go(id=id).get_lyrics()
class BECMBrowser(AbstractBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.becm.fr' PARENT = 'creditmutuel' login = URL('/fr/authentification.html', LoginPage) advisor = URL('/fr/banques/Details.aspx\?banque=.*', AdvisorPage) @need_login def get_advisor(self): advisor = None if not self.is_new_website: self.accounts.stay_or_go(subbank=self.currentSubBank) if self.page.get_advisor_link(): advisor = self.page.get_advisor() self.location( self.page.get_advisor_link()).page.update_advisor(advisor) else: advisor = self.new_accounts.stay_or_go( subbank=self.currentSubBank).get_advisor() link = self.page.get_agency() if link: self.location(link) self.page.update_advisor(advisor) return iter([advisor]) if advisor else iter([])
class T411Browser(LoginBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.t411.ai/' home = URL('$', HomePage) search = URL( 'torrents/search/\?search=(?P<pattern>.*)&order=seeders&type=desc', SearchPage) # Order matters here: 'torrents/[^&]*' would match '/torrents/download/\?id...' and # TorrentPage would crash on the bencode data, so DownloadPage must be listed before # TorrentPage download = URL('/torrents/download/\?id=(?P<id>.*)', DownloadPage) torrent = URL('/torrents/details/\?id=(?P<id>.*)&r=1', 'torrents/[^&]*', TorrentPage) def do_login(self): self.home.go() if not self.page.logged: self.page.login(self.username, self.password) if not self.page.logged: raise BrowserIncorrectPassword() @need_login def iter_torrents(self, pattern): return self.search.go(pattern=pattern).iter_torrents() @need_login def get_torrent(self, fullid, torrent=None): try: self.torrent.go(id=fullid) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class SGEnterpriseBrowser(SGPEBrowser): BASEURL = 'https://entreprises.secure.societegenerale.fr' LOGIN_FORM = 'auth' MENUID = 'BANREL' CERTHASH = '2231d5ddb97d2950d5e6fc4d986c23be4cd231c31ad530942343a8fdcc44bb99' accounts = URL('/icd/syd-front/data/syd-comptes-accederDepuisMenu.json', AccountsJsonPage) balances = URL('/icd/syd-front/data/syd-comptes-chargerSoldes.json', BalancesJsonPage) history = URL('/icd/syd-front/data/syd-comptes-chargerReleve.json', '/icd/syd-front/data/syd-intraday-chargerDetail.json', HistoryJsonPage) history_next = URL('/icd/syd-front/data/syd-comptes-chargerProchainLotEcriture.json', HistoryJsonPage) profile = URL('/gae/afficherModificationMesDonnees.html', ProfileEntPage) def go_accounts(self): self.accounts.go() @need_login def get_accounts_list(self): accounts = [] accounts.extend(self.accounts.stay_or_go().iter_accounts()) for acc in self.balances.go().populate_balances(accounts): yield acc @need_login def iter_history(self, account): value = self.history.go(data={'cl500_compte': account._id, 'cl200_typeReleve': 'valeur'}).get_value() transactions = [] transactions.extend(self.history.go(data={'cl500_compte': account._id, 'cl200_typeReleve': value}).iter_history(value=value)) transactions.extend(self.location('/icd/syd-front/data/syd-intraday-chargerDetail.json', data={'cl500_compte': account._id}).page.iter_history()) return iter(transactions)
class IlmatieteenlaitosBrowser(PagesBrowser): BASEURL = 'http://ilmatieteenlaitos.fi' cities = URL( '/etusivu\?p_p_id=locationmenuportlet_WAR_fmiwwwweatherportlets&p_p_lifecycle=2&p_p_state=normal&' 'p_p_mode=view&p_p_cacheability=cacheLevelFull&term=(?P<pattern>.*)', SearchCitiesPage) weather_query = URL( '/paikallissaa\?p_p_id=locationmenuportlet_WAR_fmiwwwweatherportlets&p_p_lifecycle=1&' 'p_p_state=normal&p_p_mode=view&_locationmenuportlet_WAR_fmiwwwweatherportlets_action=' 'changelocation') weather = URL('/saa/(?P<city_url>.*)', WeatherPage) observations = URL('/observation-data\?station=(?P<station_id>.*)', ObservationsPage) def iter_city_search(self, pattern): return self.cities.go(pattern=pattern).iter_cities() def iter_forecast(self, city): return self.weather_query.go(data={ "place": city.name, "forecast": "short" }).iter_forecast() def get_current(self, city): station_id = self.weather_query.go(data={ "place": city.name, "forecast": "short" }).get_station_id() return self.observations.go(station_id=station_id).get_current()
class T411Browser(LoginBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.t411.in/' home = URL('$', HomePage) search = URL( 'torrents/search/\?search=(?P<pattern>.*)&order=seeders&type=desc', SearchPage) torrent = URL('/torrents/details/\?id=(?P<id>.*)&r=1', 'torrents/[^&]*', TorrentPage) #def __init__(self, *args, **kwargs): # Browser.__init__(self, *args, **kwargs) def do_login(self): self.home.go() if not self.page.logged: self.page.login(self.username, self.password) if not self.page.logged: raise BrowserIncorrectPassword() @need_login def iter_torrents(self, pattern): return self.search.go(pattern=pattern).iter_torrents() @need_login def get_torrent(self, fullid, torrent=None): try: self.torrent.go(id=fullid) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class BtmonBrowser(PagesBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'http://www.btmon.com/' home = URL('$', HomePage) search = URL(r'/torrent/\?sort=relevance&f=(?P<pattern>.*)', SearchPage) torrent = URL(r'/(?P<torrent_id>.*)\.torrent\.html', TorrentPage) def get_bpc_cookie(self): if 'BPC' not in self.session.cookies: self.home.go() bpcCookie = str(self.page.content).split('BPC=')[-1].split('"')[0] self.session.cookies['BPC'] = bpcCookie def iter_torrents(self, pattern): self.get_bpc_cookie() return self.search.go(pattern=pattern).iter_torrents() def get_torrent(self, id): try: self.get_bpc_cookie() self.torrent.go(torrent_id=id) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class LyricsplanetBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.lyricsplanet.com/' home = URL('$', HomePage) search = URL('search\.php$', SearchPage) artist = URL('search\.php\?field=artisttitle&value=(?P<artistid>[^/]*)$', ArtistPage) lyrics = URL('lyrics\.php\?id=(?P<songid>[^/]*)$', LyricsPage) def iter_lyrics(self, criteria, pattern): self.home.stay_or_go() assert self.home.is_here() self.page.search_lyrics(criteria, pattern) assert self.search.is_here() if criteria == 'song': return self.page.iter_song_lyrics() elif criteria == 'artist': artist_ids = self.page.get_artist_ids() it = [] # we just take the 3 first artists to avoid too many page loadings for aid in artist_ids[:3]: it = itertools.chain( it, self.artist.go(artistid=aid).iter_lyrics()) return it def get_lyrics(self, id): try: self.lyrics.go(songid=id) songlyrics = self.page.get_lyrics() return songlyrics except BrowserHTTPNotFound: return
class SGPEBrowser(LoginBrowser): login = URL('$', LoginPage) cards = URL('/Pgn/.+PageID=Cartes&.+', CardsPage) cards_history = URL('/Pgn/.+PageID=ReleveCarte&.+', CardHistoryPage) def is_logged(self): if not self.page or self.login.is_here(): return False error = self.page.get_error() if error is None: return True return False def do_login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) if not self.password.isdigit(): raise BrowserIncorrectPassword('Password must be 6 digits long.') self.login.stay_or_go() self.session.cookies.set('PILOTE_OOBA', 'true') self.page.login(self.username, self.password) # force page change if not self.accounts.is_here(): self.go_accounts() if not self.is_logged(): raise BrowserIncorrectPassword() def card_history(self, account, coming): page = 1 while page: self.location('/Pgn/NavigationServlet?PageID=ReleveCarte&MenuID=%sOPF&Classeur=1&Rib=%s&Carte=%s&Date=%s&PageDetail=%s&Devise=%s' % \ (self.MENUID, account.id, coming['carte'], coming['date'], page, account.currency)) for transaction in self.page.iter_transactions( date=coming['date']): yield transaction if self.page.has_next(): page += 1 else: page = False @need_login def get_cb_operations(self, account): self.location( '/Pgn/NavigationServlet?PageID=Cartes&MenuID=%sOPF&Classeur=1&NumeroPage=1&Rib=%s&Devise=%s' % (self.MENUID, account.id, account.currency)) for coming in self.page.get_coming_list(): for tr in self.card_history(account, coming): yield tr def iter_investment(self, account): raise NotImplementedError()
class IpinfodbBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'https://ipinfodb.com/' home = URL('$', HomePage) search = URL('ip_locator.php', LocationPage) def get_location(self, ipaddr): try: self.home.go() self.page.search(ipaddr) iploc = self.page.get_location() return iploc except BrowserHTTPNotFound: return
class SGProfessionalBrowser(SGEnterpriseBrowser): BASEURL = 'https://professionnels.secure.societegenerale.fr' LOGIN_FORM = 'auth_reco' MENUID = 'SBOREL' CERTHASH = '9f5232c9b2283814976608bfd5bba9d8030247f44c8493d8d205e574ea75148e' profile = URL('/gao/modifier-donnees-perso-saisie.html', ProfileProPage)
class BECMBrowser(AbstractBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.becm.fr' PARENT = 'creditmutuel' login = URL('/fr/authentification.html', LoginPage) advisor = URL('/fr/banques/Details.aspx\?banque=.*', AdvisorPage) def do_login(self): # Clear cookies. self.do_logout() self.login.go() if not self.page.logged: self.page.login(self.username, self.password) # Many "Credit Mutuel" customers tried to add their connection to BECM, but the BECM # website does not return any error when you try to login with correct Crédit Mutuel # credentials, therefore we must suggest them to try regular Crédit Mutuel if login fails. if self.login.is_here(): raise ActionNeeded( "La connexion au site de BECM n'a pas fonctionné avec les identifiants fournis.\ Si vous êtes client du Crédit Mutuel, veuillez réessayer en sélectionnant le module Crédit Mutuel." ) if self.verify_pass.is_here(): raise AuthMethodNotImplemented( "L'identification renforcée avec la carte n'est pas supportée." ) @need_login def get_advisor(self): advisor = None if not self.is_new_website: self.accounts.stay_or_go(subbank=self.currentSubBank) if self.page.get_advisor_link(): advisor = self.page.get_advisor() self.location( self.page.get_advisor_link()).page.update_advisor(advisor) else: advisor = self.new_accounts.stay_or_go( subbank=self.currentSubBank).get_advisor() link = self.page.get_agency() if link: self.location(link) self.page.update_advisor(advisor) return iter([advisor]) if advisor else iter([])
class LimetorrentsBrowser(PagesBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.limetorrents.info/' search = URL(r'/search/all/(?P<pattern>.*)/seeds/(?P<page>[0-9]+)/', SearchPage) torrent = URL(r'/(?P<torrent_name>.*)-torrent-(?P<torrent_id>[0-9]+)\.html', TorrentPage) def iter_torrents(self, pattern): return self.search.go(pattern=pattern, page=1).iter_torrents() def get_torrent(self, id): try: self.torrent.go(torrent_id=id, torrent_name='whatever') torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class CICBrowser(AbstractBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.cic.fr' PARENT = 'creditmutuel' login = URL(r'/fr/authentification.html', r'/sb/fr/banques/particuliers/index.html', r'/(?P<subbank>.*)/fr/$', r'/(?P<subbank>.*)/fr/banques/accueil.html', r'/(?P<subbank>.*)/fr/banques/particuliers/index.html', LoginPage) por = URL(r'/(?P<subbank>.*)fr/banque/PORT_Synthese.aspx', PorPage) decoupled_state = URL(r'/fr/otp/SOSD_OTP_GetTransactionState.htm', DecoupledStatePage) cancel_decoupled = URL(r'/fr/otp/SOSD_OTP_CancelTransaction.htm', CancelDecoupled)
class IpinfodbBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'https://ipinfodb.com/' home = URL('$', LocationPage) def get_location(self, ipaddr): self.home.go(data={'ip': ipaddr}) return self.page.get_location()
class CpasbienBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.cpasbien.cm/' search = URL('recherche/(?P<pattern>.*).html,trie-seeds-d', SearchPage) torrent = URL('dl-torrent/(?P<id>.*)\.html', TorrentPage) def iter_torrents(self, pattern): self.search.go(pattern=pattern) return self.page.iter_torrents() def get_torrent(self, fullid): try: self.torrent.go(id=fullid) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class NewspaperLibeBrowser(AbstractBrowser): "NewspaperLibeBrowser class" PARENT = 'genericnewspaper' BASEURL = '' article = URL('http://.*liberation.fr/.*', ArticlePage) def __init__(self, weboob, *args, **kwargs): self.weboob = weboob super(self.__class__, self).__init__(*args, **kwargs)
class NewspaperTazBrowser(AbstractBrowser): "NewspaperTazBrowser class" PARENT = 'genericnewspaper' BASEURL = 'http://www.taz.de' article_page = URL('/.*', ArticlePage) def __init__(self, *args, **kwargs): self.weboob = kwargs['weboob'] super(NewspaperTazBrowser, self).__init__(*args, **kwargs)
class T411Browser(LoginBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.t411.si/' home = URL('$', HomePage) login = URL('/login$', LoginPage) search = URL(r'/torrents/search/\?search=(?P<pattern>.*)', SearchPage) download = URL( '/telecharger-torrent/(?P<torrent_hash>[0-9a-f]{40})/(?P<torrent_name>\w+)', DownloadPage) torrent = URL('/torrents/(?P<torrent_id>[0-9]+)/(?P<torrent_name>.*)', TorrentPage) def do_login(self): self.home.go() if not self.page.logged: self.page.login(self.username, self.password) self.home.go() if not self.page.logged: raise BrowserIncorrectPassword() @need_login def iter_torrents(self, pattern): return self.search.go(pattern=pattern).iter_torrents() @need_login def get_torrent(self, torrent): try: self.torrent.go(torrent_id=torrent.id, torrent_name=torrent.name) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return def get_torrent_file(self, torrent): torrent = self.browser.get_torrent(torrent) if not torrent: return None resp = self.browser.open(torrent.url) return resp.content
class NewspaperInrocksBrowser(AbstractBrowser): "NewspaperInrocksBrowser class" PARENT = 'genericnewspaper' BASEURL = 'http://www.lesinrocks.com' article = URL('/\?p=.+', '/\d{4}/\d{2}/\d{2}/actualite/.*', 'http://blogs.lesinrocks.com/.*', '/.*', ArticlePage) def __init__(self, *args, **kwargs): self.weboob = kwargs['weboob'] super(NewspaperInrocksBrowser, self).__init__(*args, **kwargs)
class CICBrowser(AbstractBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://www.cic.fr' PARENT = 'creditmutuel' login = URL('/sb/fr/banques/particuliers/index.html', '/(?P<subbank>.*)/fr/$', '/(?P<subbank>.*)/fr/banques/accueil.html', '/(?P<subbank>.*)/fr/banques/particuliers/index.html', LoginPage)
class AmericanExpressLoginBrowser(SeleniumBrowser): BASEURL = 'https://global.americanexpress.com' DRIVER = webdriver.Chrome # True for Production / False for debug HEADLESS = True login = URL(r'/login', LoginPage) login_error = URL(r'/login', r'/authentication/recovery/password', LoginErrorPage) dashboard = URL(r'/dashboard', DashboardPage) def __init__(self, config, *args, **kwargs): super(AmericanExpressLoginBrowser, self).__init__(*args, **kwargs) self.username = config['login'].get() self.password = config['password'].get() def do_login(self): self.login.go() self.wait_until_is_here(self.login) self.page.login(self.username, self.password) self.wait_until( AnyCondition( IsHereCondition(self.login_error), IsHereCondition(self.dashboard), )) if self.login_error.is_here(): error = self.page.get_error() if any(( 'The User ID or Password is incorrect' in error, 'Both the User ID and Password are required' in error, )): raise BrowserIncorrectPassword(error) if 'Your account has been locked' in error: raise ActionNeeded(error) assert False, 'Unhandled error : "%s"' % error
class ParolesmusiqueBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.paroles-musique.com/' home = URL('$', HomePage) songResults = URL('lyrics-paroles-0-.*,0.php', SongResultsPage) artistResults = URL('lyrics-paroles-.*-0,0.php', ArtistResultsPage) songLyrics = URL('paroles-(?P<songid>.*,p[0-9]*)', SonglyricsPage) artistSongs = URL('paroles-(?P<artistid>.*,a[0-9]*)', ArtistSongsPage) def iter_lyrics(self, criteria, pattern): self.home.stay_or_go() assert self.home.is_here() self.page.search_lyrics(criteria, pattern) if criteria == 'song': assert self.songResults.is_here() return self.page.iter_lyrics() elif criteria == 'artist': assert self.artistResults.is_here() artist_ids = self.page.get_artist_ids() it = [] # we just take the 3 first artists to avoid too many page loadings for aid in artist_ids[:3]: it = itertools.chain(it, self.artistSongs.go(artistid=aid).iter_lyrics()) return it def get_lyrics(self, id): try: self.songLyrics.go(songid=id) songlyrics = self.page.get_lyrics() return songlyrics except BrowserHTTPNotFound: return
class KickassBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'https://kat.cr/' search = URL('usearch/(?P<pattern>.*)/\?field=seeders&sorder=desc', SearchPage) torrent = URL('torrent-t(?P<id>.*).html', '.*-t[0-9]*\.html', TorrentPage) def iter_torrents(self, pattern): self.search.go(pattern=pattern) #print( self.page.content) return self.page.iter_torrents() def get_torrent(self, fullid): try: self.torrent.go(id=fullid) torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return
class YggtorrentBrowser(LoginBrowser): PROFILE = Wget() TIMEOUT = 30 BASEURL = 'https://yggtorrent.to/' home = URL('$', HomePage) login = URL('/user/login$', LoginPage) search = URL(r'/engine/search\?name=(?P<pattern>.*)&order=desc&sort=seed&do=search', SearchPage) download = URL('/engine/download_torrent\?id=(?P<torrent_id>[0-9]+)', DownloadPage) torrent = URL('/torrent/(?P<torrent_cat>.+)/(?P<torrent_subcat>.+)/(?P<torrent_id>[0-9]+)-(?P<torrent_name>.*)', TorrentPage) def do_login(self): self.home.go() if not self.page.logged: self.page.login(self.username, self.password) self.home.go() if not self.page.logged: raise BrowserIncorrectPassword() @need_login def iter_torrents(self, pattern): return self.search.go(pattern=pattern).iter_torrents() @need_login def get_torrent(self, id): try: self.torrent.go(torrent_id=id, torrent_name='anything', torrent_cat='any', torrent_subcat='thing') torrent = self.page.get_torrent() return torrent except BrowserHTTPNotFound: return @need_login def get_torrent_file(self, id): torrent = self.browser.get_torrent(id) if not torrent: return None resp = self.browser.open(torrent.url) return resp.content
class PastealaconBrowser(PagesBrowser): BASEURL = 'http://paste.alacon.org/' paste = URL(r'(?P<id>\d+)', PastePage) captcha = URL(r'%s' % re.escape('pastebin.php?captcha=1'), CaptchaPage) raw = URL(r'%s(?P<id>\d+)' % re.escape('pastebin.php?dl=')) post = URL(r'$', PostPage) @paste.id2url def get_paste(self, url): url = self.absurl(url, base=True) m = self.paste.match(url) if m: return PastealaconPaste(m.groupdict()['id']) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ self.paste.stay_or_go(id=paste.id) return self.page.fill_paste(paste) def get_contents(self, _id): """ Get the contents from the raw URL This is the fastest and safest method if you only want the content. Returns unicode. """ try: # despite what the HTTP header says, it is iso8859 return self.raw.open(id=_id).content.decode('iso8859-15') except BrowserHTTPNotFound: raise PasteNotFound() def post_paste(self, paste, expiration=None): self.post.stay_or_go().post(paste, expiration=expiration) if self.captcha.is_here(): raise Spam() self.page.fill_paste(paste)
class LyricsmodeBrowser(PagesBrowser): PROFILE = Firefox() TIMEOUT = 30 BASEURL = 'http://www.lyricsmode.com/' search = URL('search\.php\?search=(?P<pattern>[^&/]*)$', SearchPage) songLyrics = URL('lyrics/(?P<letterid>[^/]*)/(?P<artistid>[^/]*)/(?P<songid>[^/]*)\.html$', LyricsPage) def iter_lyrics(self, criteria, pattern): return self.search.go(pattern=pattern).iter_lyrics() def get_lyrics(self, id): subid = id.split('|') try: self.songLyrics.go(letterid=subid[0], artistid=subid[1], songid=subid[2]) songlyrics = self.page.get_lyrics() return songlyrics except BrowserHTTPNotFound: return
class NewspaperFigaroBrowser(AbstractBrowser): "NewspaperFigaroBrowser class" PARENT = 'genericnewspaper' BASEURL = 'http://lefigaro.fr' article_page = URL( 'http://lefigaro.fr/(.*)/(\d{4})/(\d{2})/(\d{2})/(.*$)', 'http://\w+.lefigaro.fr/(.*)/(\d{4})/(\d{2})/(\d{2})/(.*$)', 'http://\w+.lefigaro.fr/(.*)', ArticlePage) def __init__(self, weboob, *args, **kwargs): self.weboob = weboob super(self.__class__, self).__init__(*args, **kwargs)
class SprungeBrowser(PagesBrowser): BASEURL = 'http://sprunge.us/' paste = URL(r'(?P<id>\w+)', PastePage) post = URL(r'$') @paste.id2url def get_paste(self, url): url = self.absurl(url, base=True) m = self.paste.match(url) if m: return SprungePaste(m.groupdict()['id']) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ return self.paste.stay_or_go(id=paste.id).fill_paste(paste) def post_paste(self, paste): url = self.post.open(data={'sprunge': paste.contents}).text.strip() self.location(url) return self.page.fill_paste(paste)