def fact(): """- gets a random fact from OMGFACTS""" attempts = 0 # all of this is because omgfacts is fail while True: try: soup = http.get_soup('http://www.omg-facts.com/random') except (http.HTTPError, http.URLError): if attempts > 2: return "Could not find a fact!" else: attempts += 1 continue response = soup.find('a', {'class': 'surprise'}) link = response['href'] fact_data = ''.join(response.find(text=True)) if fact_data: fact_data = fact_data.strip() break else: if attempts > 2: return "Could not find a fact!" else: attempts += 1 continue url = web.try_shorten(link) return "{} - {}".format(fact_data, url)
def sptfy(text, sptfy=False): if sptfy: shortenurl = "http://sptfy.com/index.php" data = { "longUrl": text, "shortUrlDomain": 1, "submitted": 1, "shortUrlFolder": 6, "customUrl": "", "shortUrlPassword": "", "shortUrlExpiryDate": "", "shortUrlUses": 0, "shortUrlType": 0 } try: soup = http.get_soup(shortenurl, data=data, cookies=True) except: return text try: link = soup.find("div", {"class": "resultLink"}).text.strip() return link except: message = "Unable to shorten URL: %s" % \ soup.find("div", {"class": "messagebox_text"}).find("p").text.split("<br/>")[0] return message else: return web.try_shorten(text)
def get_gifs(url): soup = get_soup(url) container = soup.find('div', class_="row") gifs = [ urljoin(url, elem["data-src"]) for elem in container.find_all('img', {'data-src': True}) ] return gifs
def refresh_cache(): """gets a page of random MLIAs and puts them into a dictionary """ url = 'http://mylifeisaverage.com/{}'.format(random.randint(1, 11000)) soup = http.get_soup(url) for story in soup.find_all('div', {'class': 'story '}): mlia_id = story.find('span', {'class': 'left'}).a.text mlia_text = story.find('div', {'class': 'sc'}).text.strip() mlia_cache.append((mlia_id, mlia_text))
def ytplaylist_url(match): location = match.group(4).split("=")[-1] try: soup = http.get_soup("https://www.youtube.com/playlist?list=" + location) except Exception: return "\x034\x02Invalid response." title = soup.find('title').text.split('-')[0].strip() author = soup.find('img', {'class': 'channel-header-profile-image'})['title'] num_videos = soup.find('ul', {'class': 'header-stats'}).findAll('li')[0].text.split(' ')[0] views = soup.find('ul', {'class': 'header-stats'}).findAll('li')[1].text.split(' ')[0] return "\x02{}\x02 - \x02{}\x02 views - \x02{}\x02 videos - \x02{}\x02".format(title, views, num_videos, author)
def test_get_soup(): test_data = """ <html> <body> <div class="thing"><p>foobar</p></div> </body> </html> """ with patch('cloudbot.util.http.get', lambda *a, **k: test_data): from cloudbot.util import http soup = http.get_soup('http://example.com') assert soup.find('div', {'class': "thing"}).p.text == "foobar"
def xkcd_search(term): search_term = http.quote_plus(term) soup = http.get_soup("http://www.ohnorobot.com/index.pl?s={}&Search=Search&" "comic=56&e=0&n=0&b=0&m=0&d=0&t=0".format(search_term)) result = soup.find('li') if result: url = result.find('div', {'class': 'tinylink'}).text xkcd_id = url[:-1].split("/")[-1] print(xkcd_id) return xkcd_info(xkcd_id, url=True) else: return "No results found!"
def hulu_search(text): """<query> - searches Hulu for <query>""" result = http.get_soup( "http://m.hulu.com/search?dp_identifier=hulu&{}&items_per_page=1&page=1".format(urlencode({'query': text}))) data = result.find('results').find('videos').find('video') showname = data.find('show').find('name').text title = data.find('title').text duration = timeformat.format_time(int(float(data.find('duration').text))) description = data.find('description').text rating = data.find('content-rating').text return "{}: {} - {} - {} ({}) {}".format(showname, title, description, duration, rating, "http://www.hulu.com/watch/" + str(data.find('id').text))
def gelbooru_url(match): soup = http.get_soup('http://gelbooru.com/index.php?page=dapi&s=post&q=index&id={}'.format(match.group(1))) posts = soup.find_all('post') id, score, url, rating, tags = (posts[0].get('id'), posts[0].get('score'), posts[0].get('file_url'),posts[0].get('rating'),posts[0].get('tags')) if rating is 'e': rating = "\x02\x034NSFW\x03\x02" elif rating is 'q': rating = "\x02\x037Questionable\x03\x02" elif rating is 's': rating = "\x02\x033Safe\x03\x02" return u'\x02[{}]\x02 Score: \x02{}\x02 - Rating: {} - {} - {}'.format(id, score, rating, url, tags[:75].strip()) # http://gelbooru.com/index.php?page=post&s=list&tags=%3D_%3D
def hulu_search(inp): """hulu <search> - Search Hulu""" result = http.get_soup( "http://m.hulu.com/search?dp_identifier=hulu&{}&items_per_page=1&page=1".format(urlencode({"query": inp})) ) data = result.find("results").find("videos").find("video") showname = data.find("show").find("name").text title = data.find("title").text duration = timeformat.timeformat(int(float(data.find("duration").text))) description = data.find("description").text rating = data.find("content-rating").text return "{}: {} - {} - {} ({}) {}".format( showname, title, description, duration, rating, "http://www.hulu.com/watch/" + str(data.find("id").text) )
def lyrics(text): """<search> - search AZLyrics.com for song lyrics""" if "pastelyrics" in text: dopaste = True text = text.replace("pastelyrics", "").strip() else: dopaste = False soup = http.get_soup(url + text.replace(" ", "+")) if "Try to compose less restrictive search query" in soup.find('div', {'id': 'inn'}).text: return "No results. Check spelling." div = None for i in soup.findAll('div', {'class': 'sen'}): if "/lyrics/" in i.find('a')['href']: div = i break if div: title = div.find('a').text link = div.find('a')['href'] if dopaste: newsoup = http.get_soup(link) try: lyrics = newsoup.find('div', {'style': 'margin-left:10px;margin-right:10px;'}).text.strip() pasteurl = " " + web.paste(lyrics) except Exception as e: pasteurl = " (\x02Unable to paste lyrics\x02 [{}])".format(str(e)) else: pasteurl = "" artist = div.find('b').text.title() lyricsum = div.find('div').text if "\r\n" in lyricsum.strip(): lyricsum = " / ".join(lyricsum.strip().split("\r\n")[0:4]) # truncate, format else: lyricsum = " / ".join(lyricsum.strip().split("\n")[0:4]) # truncate, format return "\x02{}\x02 by \x02{}\x02 {}{} - {}".format(title, artist, web.try_shorten(link), pasteurl, lyricsum[:-3]) else: return "No song results. " + url + text.replace(" ", "+")
def refresh_cache(text): global gelbooru_cache gelbooru_cache = [] num = 0 search = text.replace(' ','+').replace('explicit','rating:explicit').replace('nsfw','rating:explicit').replace('safe','rating:safe').replace('sfw','rating:safe') # score:>100 #print 'http://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=20&tags={}'.format(search) soup = http.get_soup(u'http://gelbooru.com/index.php?page=dapi&s=post&q=index&limit=20&tags={}'.format(search)) posts = soup.find_all('post') while num < len(posts): gelbooru_cache.append((posts[num].get('id'), posts[num].get('score'), posts[num].get('file_url'),posts[num].get('rating'),posts[num].get('tags'))) num += 1 random.shuffle(gelbooru_cache) return
def steam(text, message): """<query> - Search for specified game/trailer/DLC""" params = {'term': text.strip().lower()} try: data = http.get_soup("http://store.steampowered.com/search/", params=params) except Exception as e: return "Could not get game info: {}".format(e) result = data.find('a', {'class': 'search_result_row'}) if not result: return "No game found." app_id = result['data-ds-appid'] message(format_game(app_id))
def twitch_lookup(location): locsplit = location.split("/") if len(locsplit) > 1 and len(locsplit) == 3: channel = locsplit[0] _type = locsplit[1] # should be b or c _id = locsplit[2] else: channel = locsplit[0] _type = None _id = None fmt = "{}: {} playing {} ({})" # Title: nickname playing Game (x views) if _type and _id: if _type == "b": # I haven't found an API to retrieve broadcast info soup = http.get_soup("http://twitch.tv/" + location) title = soup.find('span', {'class': 'real_title js-title'}).text playing = soup.find('a', {'class': 'game js-game'}).text views = soup.find('span', {'id': 'views-count'}).text + " view" views = views + "s" if not views[0:2] == "1 " else views return html.unescape(fmt.format(title, channel, playing, views)) elif _type == "c": data = http.get_json("https://api.twitch.tv/kraken/videos/" + _type + _id) title = data['title'] playing = data['game'] views = str(data['views']) + " view" views = views + "s" if not views[0:2] == "1 " else views return html.unescape(fmt.format(title, channel, playing, views)) else: data = http.get_json("https://api.twitch.tv/kraken/streams?channel=" + channel) if data["streams"]: title = data["streams"][0]["channel"]["status"] playing = data["streams"][0]["game"] v = data["streams"][0]["viewers"] viewers = "\x033\x02Online now!\x02\x0f " + str(v) + " viewer" + ( "s" if v != 1 else "") return html.unescape(fmt.format(title, channel, playing, viewers)) else: try: data = http.get_json("https://api.twitch.tv/kraken/channels/" + channel) except Exception: return "Unable to get channel data. Maybe channel is on justin.tv instead of twitch.tv?" title = data['status'] playing = data['game'] viewers = "\x034\x02Offline\x02\x0f" return html.unescape(fmt.format(title, channel, playing, viewers))
def twitch_lookup(location): locsplit = location.split("/") if len(locsplit) > 1 and len(locsplit) == 3: channel = locsplit[0] type = locsplit[1] # should be b or c id = locsplit[2] else: channel = locsplit[0] type = None id = None h = HTMLParser() fmt = "{}: {} playing {} ({})" # Title: nickname playing Game (x views) if type and id: if type == "b": # I haven't found an API to retrieve broadcast info soup = http.get_soup("http://twitch.tv/" + location) title = soup.find('span', {'class': 'real_title js-title'}).text playing = soup.find('a', {'class': 'game js-game'}).text views = soup.find('span', {'id': 'views-count'}).text + " view" views = views + "s" if not views[0:2] == "1 " else views return h.unescape(fmt.format(title, channel, playing, views)) elif type == "c": data = http.get_json("https://api.twitch.tv/kraken/videos/" + type + id) title = data['title'] playing = data['game'] views = str(data['views']) + " view" views = views + "s" if not views[0:2] == "1 " else views return h.unescape(fmt.format(title, channel, playing, views)) else: data = http.get_json("http://api.justin.tv/api/stream/list.json?channel=" + channel) if data and len(data) >= 1: data = data[0] title = data['title'] playing = data['meta_game'] viewers = "\x033\x02Online now!\x02\x0f " + str(data["channel_count"]) + " viewer" print(viewers) viewers = viewers + "s" if not " 1 view" in viewers else viewers print(viewers) return h.unescape(fmt.format(title, channel, playing, viewers)) else: try: data = http.get_json("https://api.twitch.tv/kraken/channels/" + channel) except: return title = data['status'] playing = data['game'] viewers = "\x034\x02Offline\x02\x0f" return h.unescape(fmt.format(title, channel, playing, viewers))
def sptfy(inp, sptfy=False): if sptfy: shortenurl = "http://sptfy.com/index.php" data = urlencode({'longUrl': inp, 'shortUrlDomain': 1, 'submitted': 1, "shortUrlFolder": 6, "customUrl": "", "shortUrlPassword": "", "shortUrlExpiryDate": "", "shortUrlUses": 0, "shortUrlType": 0}) try: soup = http.get_soup(shortenurl, post_data=data, cookies=True) except: return inp try: link = soup.find('div', {'class': 'resultLink'}).text.strip() return link except: message = "Unable to shorten URL: {}".format(soup.find('div', { 'class': 'messagebox_text'}).find('p').text.split("<br/>")[0]) return message else: return web.try_shorten(inp)
def twitch_lookup(location): locsplit = location.split("/") if len(locsplit) > 1 and len(locsplit) == 3: channel = locsplit[0] _type = locsplit[1] # should be b or c _id = locsplit[2] else: channel = locsplit[0] _type = None _id = None fmt = "{}: {} playing {} ({})" # Title: nickname playing Game (x views) if _type and _id: if _type == "b": # I haven't found an API to retrieve broadcast info soup = http.get_soup("http://twitch.tv/" + location) title = soup.find("span", {"class": "real_title js-title"}).text playing = soup.find("a", {"class": "game js-game"}).text views = soup.find("span", {"id": "views-count"}).text + " view" views = views + "s" if not views[0:2] == "1 " else views return html.unescape(fmt.format(title, channel, playing, views)) elif _type == "c": data = http.get_json("https://api.twitch.tv/kraken/videos/" + _type + _id) title = data["title"] playing = data["game"] views = str(data["views"]) + " view" views = views + "s" if not views[0:2] == "1 " else views return html.unescape(fmt.format(title, channel, playing, views)) else: data = http.get_json("https://api.twitch.tv/kraken/streams?channel=" + channel) if data["streams"]: title = data["streams"][0]["channel"]["status"] playing = data["streams"][0]["game"] v = data["streams"][0]["viewers"] viewers = "\x033\x02Online now!\x02\x0f " + str(v) + " viewer" + ("s" if v != 1 else "") return html.unescape(fmt.format(title, channel, playing, viewers)) else: try: data = http.get_json("https://api.twitch.tv/kraken/channels/" + channel) except Exception: return "Unable to get channel data. Maybe channel is on justin.tv instead of twitch.tv?" title = data["status"] playing = data["game"] viewers = "\x034\x02Offline\x02\x0f" return html.unescape(fmt.format(title, channel, playing, viewers))
def osrc(text): """<github user> - gets an Open Source Report Card for <github user> from osrc.dfm.io""" user_nick = text.strip() url = user_url.format(user_nick) try: soup = http.get_soup(url) except (http.HTTPError, http.URLError): return "Couldn't find any stats for this user." report = soup.find("div", {"id": "description"}).find("p").get_text() # Split and join to remove all the excess whitespace, slice the # string to remove the trailing full stop. report = " ".join(report.split())[:-1] short_url = web.try_shorten(url) return "{} - {}".format(report, short_url)
def recipe(text): """[term] - gets a recipe for [term], or gets a random recipe if no term is specified""" if text: # get the recipe URL by searching try: search = http.get_soup(search_url, query=text.strip()) except (http.HTTPError, http.URLError) as e: return "Could not get recipe: {}".format(e) # find the list of results result_list = search.find('div', {'class': 'found_results'}) if result_list: results = result_list.find_all('div', {'class': 'recipe_result'}) else: return "No results" # pick a random front page result result = random.choice(results) # extract the URL from the result url = base_url + result.find('div', {'class': 'image-wrapper'}).find('a')['href'] else: # get a random recipe URL try: page = http.open(random_url) except (http.HTTPError, http.URLError) as e: return "Could not get recipe: {}".format(e) url = page.geturl() # use get_data() to get the recipe info from the URL try: data = get_data(url) except ParseError as e: return "Could not parse recipe: {}".format(e) name = data.name.strip() return "Try eating \x02{}!\x02 - {}".format(name, web.try_shorten(url))
def horoscope(text, db, notice, nick): """<sign> - get your horoscope""" # check if the user asked us not to save his details dontsave = text.endswith(" dontsave") if dontsave: sign = text[:-9].strip().lower() else: sign = text db.execute("create table if not exists horoscope(nick primary key, sign)") if not sign: sign = db.execute("select sign from horoscope where " "nick=lower(:nick)", {'nick': nick}).fetchone() if not sign: notice("horoscope <sign> -- Get your horoscope") return sign = sign[0] url = "http://my.horoscope.com/astrology/free-daily-horoscope-{}.html".format(sign) soup = http.get_soup(url) title = soup.find_all('h1', {'class': 'h1b'})[1] horoscope_text = soup.find('div', {'class': 'fontdef1'}) result = "\x02{}\x02 {}".format(title, horoscope_text) result = formatting.strip_html(result) # result = unicode(result, "utf8").replace('flight ','') if not title: return "Could not get the horoscope for {}.".format(text) if text and not dontsave: db.execute("insert or replace into horoscope(nick, sign) values (:nick, :sign)", {'nick': nick.lower(), 'sign': sign}) db.commit() return result
def gelbooru_list_url(match): soup = http.get_soup(match.group(1)) return u'{}'.format(soup.find('title').text)