def main(): browser = Browser() try: makedirs(DIRNAME) except OSError: pass files = sorted(listdir(DIRNAME)) if files: lastfile = files[-1] else: lastfile = "1989-04-16.gif" lastday = lastfile.split(".")[0] next_url = BASE_URL + lastday + "/" while next_url: html = browser.get_html(next_url) img_url = re.search(r'''http://dilbert.com/dyn/str_strip.*?\.gif''', html).group(0) filename = path.join(DIRNAME, next_url.split("/")[-2] + ".gif") print filename print "img:", img_url with open(filename, "w") as file: file.write(browser.get_html(img_url)) next_url = re.search(r'''href="(/strips/comic/.*?/") class="STR_Next PNG_Fix"''', html) if next_url: next_url = next_url.group(1) else: print html
def main(): browser = Browser() next_url = "http://sinfest.net/archive_page.php?comicID=4323" while next_url: html = browser.get_html(next_url) img_url = re.search(r'''http://sinfest.net/comikaze/comics/[\d-]*.gif''', html).group(0) next_url = re.search(r'''"(http://sinfest.net/archive_page.php\?comicID=\d*)"><img src="images/next_a.gif"''', html) if next_url: next_url = next_url.group(1) filename = img_url.split("/")[-1] print filename with open(filename, "w") as file: file.write(browser.get_html(img_url)) print "img:", img_url print "next:", next_url
def get_token(username, password=None, appid="616893704999769"): url = ("""https://graph.facebook.com/oauth/authorize?client_id=""" + appid + """&redirect_uri=http://www.facebook.com/connect/""" """login_success.html&type=user_agent&display=popup&scope=""" "user_about_me,friends_about_me,user_activities,friends_activities," "user_birthday,friends_birthday,user_checkins,friends_checkins," "user_education_history,friends_education_history,user_events," "friends_events,user_groups,friends_groups,user_hometown,friends_hometown," "user_interests,friends_interests,user_likes,friends_likes,user_location," "friends_location,user_notes,friends_notes,user_photos,friends_photos," "user_questions,friends_questions,user_relationships," "friends_relationships,user_relationship_details," "friends_relationship_details,user_religion_politics," "friends_religion_politics,user_status,friends_status,user_subscriptions," "friends_subscriptions,user_videos,friends_videos,user_website," "friends_website,user_work_history,friends_work_history,read_friendlists," "read_insights,read_mailbox,read_requests,read_stream,xmpp_login," "user_online_presence,friends_online_presence,ads_management,create_event," "manage_friendlists,manage_notifications,publish_actions,publish_stream," "rsvp_event,publish_actions,user_actions.music,friends_actions.music," "user_actions.news,friends_actions.news,user_actions.video," "friends_actions.video,user_games_activity,friends_games_activity," "manage_pages,email" ) # give me the power if not password: username, password = get_credentials(username) browser = Browser() form = browser.get_forms("http://fb.com")[0] form["email"] = username form["pass"] = password form.submit() assert troubleshoting(browser) html = browser.get_html(url) assert troubleshoting(browser) html = browser.get_html() if "Success" in html: try: utoken = next((url for url in reversed(browser.hist) if "token" in url)) except IndexError: import IPython IPython.embed() token = utoken.split("=")[1] token = token.split("&")[0] return token else: browser.show() raise ValueError("Not success")
#!/usr/bin/env python #-*- coding: UTF-8 -*- from litebrowser import Browser from fileinput import input import urllib BASE_URL = ("http://translate.google.com/translate_a/t?" "client=t&sl=auto&tl=es&hl=es-419&sc=2&ie=UTF-8&oe=UTF-8&" "uptl=es&alttl=en&oc=1&otf=2&ssel=0&tsel=0&q=%s") b = Browser() b.go("http://translate.google.com") for line in input(): text = urllib.quote(line) b.go(BASE_URL % text) print b.get_html()
def __init__(self, account): self.account = account self.browser = Browser()
class Claro: def __init__(self, account): self.account = account self.browser = Browser() def login(self): form = self.browser.get_forms("https://individuos.claro.com.ar")[1] form["login"] = self.account[0] form["password"] = self.account[1] form.submit() def get_saldos(self): saldos = {} html = self.browser.get_html("web/guest/saldos-y-consumos1") regexs = { "Abono fijo": r''' (?xs)Saldo\ del\ Abono:.*?\$ (?P<saldo> (?:\d+,)?\d+ ).*?Llevás\ consumidos\ \$ (?P<consumido> (?:\d+,)?\d+ ).*? (?P<total> \d*,\d* ).*?Per.+?odo\ actual.*? (?P<vencimiento> \d+/\d+/\d+ ) ''', "Paquete de datos": r''' (?xs)Saldo\ de\ Paquetes.*?Te\ quedan\ (?P<saldo> \d+? )\ MB\ de\ tu\ paquete\ de\ (?P<total> \d+ )\ MB\..*?Vence\ el\ (?P<vencimiento> \d+/\d+/\d+ ) ''', "Paquete de sms": r''' (?xs).*?Te\ quedan\ (?P<saldo> \d+ )\ SMS\ de\ tu\ paquete\ de\ (?P<total> \d+ )\ SMS\..*?Vence\ el\ + (?P<vencimiento> \d+/\d+/\d+ ) ''', "SMS Promocionales": r''' (?xs)SMS\ Promocionales.*?Tenés\ .*? (?P<saldo> \d+ ).*?SMS\ disponibles\ hasta\ el\ (?P<vencimiento> \d+/\d+/\d+ ) ''', "Crédito prepago congelado": r''' (?xs)Crédito\ de\ Recarga.*?Crédito\ Congelado.*? Saldo.*?Vencimiento.*?\$ (?P<saldo> \d+,\d+ ).*? (?P<vencimiento> \d+/\d+/\d+ ).*?Última\ recarga.*? (?P<ultima_recarga> \d+/\d+/\d+ ) ''', "Crédito prepago vigente": r''' (?xs)Crédito\ de\ Recarga.*?Recarga.*?\$ (?P<saldo> \d+,\d+ ).*? (?P<vencimiento> \d+/\d+/\d+ ) ''', "Crédito prepago promocional": r''' (?xs)Crédito\ de\ Recarga.*?Promocional.*?\$ (?P<saldo> \d+,\d+ ).*? (?P<vencimiento> \d+/\d+/\d+ ) ''', "Crédito de recarga": r''' (?xs)Crédito\ de\ Recarga.*?Última\ recarga.*? (?P<ultima_recarga> \d+/\d+/\d+ ) ''' } for title, regex in regexs.items(): match = re.search(regex, html) if match: saldos[title] = match.groupdict() return saldos def get_circulo_opciones(self): self.browser.go("web/guest/consultar-claro-club") self.browser.get_forms()[0].submit() soup = BeautifulSoup(self.browser.get_html()) opciones = [] regex = re.compile(r"""(?xs) <tr>\ <td.*?> (?P<descripcion>(?P<cantidad>\d+).*?) </td>\ <td.*?> (?P<puntos>\d+) .*?</td>.*?goSubmit\(' (?P<codigo>\d+) '\)""" ) for table in soup("table", {"class":"tabla tablaGris"}): for row in table("tr"): rawtext = row.__str__() if "goSubmit(" in rawtext and "SMS" in rawtext: match = regex.match(rawtext) opciones.append(match.groupdict()) return opciones