def lyrics(): """ Extracts lyrics from the current sont in 'bide et musique' """ res = "" page = urllib.urlopen(HOME_PAGE) content = page.read() page.close() soup = BeautifulSoup(content) souptitle = soup.findAll("p", {"class": "titre-song"})[0] title = souptitle.text artist = soup.findAll("p", {"class": "titre-song2"})[0].text souptitle = soup.findAll("p", {"class": "titre-song"})[0] url = "http://www.bide-et-musique.com" url = "%s%s" % (url, souptitle.a.get("href")) page = urllib.urlopen(url) content = page.read() page.close() soup = BeautifulSoup(content) tab = soup.findAll("td", {"class": "paroles"}) if tab == []: res = "Pas de paroles disponibles pour %s de %s" % (artist, title) else: tab = tab[0].contents res = "%s - %s\n%s\n" % (artist, title, "*" * 30) lyrics_content = "" for elt in tab: tmp = elt if str(tmp).lstrip() != "<br />": lyrics_content += xhtml2text(unicode(tmp).lstrip()) + "\n" res += lyrics_content return xhtml2text(res)
def extract(divclasse,LOCAL = False): if LOCAL: f = open(divclasse[1]) else: f = urllib.urlopen(divclasse[1]) content = f.read() f.close() soup = BeautifulSoup(content) grid = soup.findAll("div", {"class": divclasse[0]}) res = {} for elt in grid: for eltli in elt.findAll("li"): chaine = eltli.first("span", {"class":"txtLogoChaine"}) try: chaine = chaine.getText().partition("Programme ")[2] except AttributeError: continue soiree = "" for eltp in eltli.findAll("p"): a = eltp.findAll("a") try: title = a[0].get("title") except IndexError: continue span = eltp.findAll("span") hour = span[0].getText() soiree += "%s : %s "%(hour, title) chaine = xhtml2text(chaine).lower() soiree = xhtml2text(soiree) res[chaine] = soiree return res
def extract(divclasse, local=False): if local: f = open(divclasse[1]) else: f = urllib.urlopen(divclasse[1]) content = f.read() f.close() soup = BeautifulSoup(content) grid = soup.findAll("div", {"class": divclasse[0]}) res = {} for elt in grid: for eltli in elt.findAll("li"): chaine = eltli.first("span", {"class": "txtLogoChaine"}) try: chaine = chaine.getText().partition("Programme ")[2] except AttributeError: continue soiree = "" for eltp in eltli.findAll("p"): a = eltp.findAll("a") try: title = a[0].get("title") except IndexError: continue span = eltp.findAll("span") hour = span[0].getText() soiree += "%s : %s " % (hour, title) chaine = xhtml2text(chaine).lower() soiree = xhtml2text(soiree) res[chaine] = soiree return res
def current(): """ Returns current track """ page = urllib.urlopen(PLAYLIST) content = page.read(1500) page.close() soup = BeautifulSoup(content) try: return xhtml2text(xhtml2text(soup.findAll("title")[1].text.partition(": ")[2])) except: return "HTML parsing failed !"
def extract_data(self, html_content): """ Extracts a bashorg quote given the HTML code of the page """ soup = BeautifulSoup(html_content) sections = soup.findAll("p", {"class": "qt"}) if sections == []: return "The quote does not exist !" tables = soup.findAll("table") for elt in tables: p = elt.findAll("p", {"class": "qt"}) if p != []: content = xhtml2text(unicode(p[0])) nb = xhtml2text(unicode(elt.findAll("b")[0].text)) break return "bashorg %s :\n%s" % (nb, content)
def answer(self, sender, message): url = "http://fr.wiktionary.org/w/api.php?action=query&list=search&format=json&srsearch=%s&srlimit=10" page = urllib.urlopen(url % message) content = page.read() page.close() js = json.loads(content) try: snippet = xhtml2text(js["query"]["search"][0]["snippet"]) # Removing prononciation clean = snippet.replace(" ", " ") return clean if clean != "" else NOT_FOUND except (KeyError, IndexError): return NOT_FOUND
def html_request(msg): site = urllib.urlopen('http://duckduckgo.com/html/?q=%s' % msg) data = site.read() soup = BeautifulSoup.BeautifulSoup(data) site.close() links = soup.findAll('div', {'class': "links_main links_deep"}) results = "" for link in links[:MAX_RESULT]: url = link.find("a").get("href") contents = link.find("a").contents title = "" for data in contents: if isinstance(data, BeautifulSoup.Tag): title += " %s" % data.getString() else: title += " %s" % str(xhtml2text(data)) results += "%s - %s\n" % (title.strip(), url) return results.strip()
def parse_one_track(soup_track): """ Extracts infos of a track from HTML code of b&m pages """ artist, title = soup_track.findAll("td", {"class": "baseitem"}) tmp = "%s - %s" % (artist.text, title.text) return xhtml2text(tmp)
def parse_one_show(soup_show): """ Extracts data from a program in bide et musique """ tds = soup_show.findAll("td") hour = xhtml2text(tds[0].text) name = xhtml2text(tds[1].text) return "%s - %s" % (hour, name)
def extract_data(self, html_content): soup = BeautifulSoup(html_content) quote = soup.findAll('div', {"class": 'citation'})[0] return xhtml2text('\n'.join([p.contents[0] for p in quote.contents]))