def google_news (self, e): query = urllib.quote(e.input) url = "" if not query: url = "http://news.google.com/news?ned=us&topic=h&output=rss" else: url = "http://news.google.com/news?q=%s&output=rss" % query dom = xml.dom.minidom.parse(urllib2.urlopen(url)) newest_news = dom.getElementsByTagName('item')[0] title = newest_news.getElementsByTagName('title')[0].childNodes[0].data description = BeautifulSoup(newest_news.getElementsByTagName('description')[0].childNodes[0].data) links = description.findAll('a') for link in links: link.extract() links = description.findAll(color='#6f6f6f') for link in links: link.extract() description = str(description).strip().decode("utf-8", 'ignore') description = tools.remove_html_tags(description) description = tools.decode_htmlentities(description) description = description[0:len(description) - 9] if description.rfind(".")!=-1: description = description[0:description.rfind(".")+1] link = tools.shorten_url(newest_news.getElementsByTagName('link')[0].childNodes[0].data) e.output = "%s - %s [ %s ]" % (title.encode("utf-8", 'ignore'), description.encode("utf-8", 'ignore'), link.encode("utf-8", 'ignore')) return e
def google_news(self, e): query = urllib.quote(e.input) url = "" if not query: url = "http://news.google.com/news?ned=us&topic=h&output=rss" else: url = "http://news.google.com/news?q=%s&output=rss" % query dom = xml.dom.minidom.parse(urllib2.urlopen(url)) newest_news = dom.getElementsByTagName('item')[0] title = newest_news.getElementsByTagName('title')[0].childNodes[0].data description = BeautifulSoup( newest_news.getElementsByTagName('description')[0].childNodes[0].data) links = description.findAll('a') for link in links: link.extract() links = description.findAll(color='#6f6f6f') for link in links: link.extract() description = str(description).strip().decode("utf-8", 'ignore') description = tools.remove_html_tags(description) description = tools.decode_htmlentities(description) description = description[0:len(description) - 9] if description.rfind(".") != -1: description = description[0:description.rfind(".") + 1] link = tools.shorten_url( newest_news.getElementsByTagName('link')[0].childNodes[0].data) e.output = "%s - %s [ %s ]" % (title.encode( "utf-8", 'ignore'), description.encode( "utf-8", 'ignore'), link.encode("utf-8", 'ignore')) return e
def gwiki(bot, e): url = ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+' + urllib.parse.quote(e.input)) request = urllib.request.Request(url, None, {'Referer': 'http://irc.00id.net'}) response = urllib.request.urlopen(request) results_json = json.loads(response.read().decode('utf-8')) results = results_json['responseData']['results'] regexstring = "wikipedia.org/wiki/" result = results[0] m = re.search(regexstring, result['url']) if (m): url = result['url'] url = tools.shorten_url(url.replace('%25', '%')) #content = result['content'].encode('utf-8') content = tools.decode_htmlentities( tools.remove_html_tags(result['content'])) content = re.sub('\s+', ' ', content) content = content.replace("...", "") #print content #content = content.decode('unicode-escape') #e.output = content e.output = "%s [ %s ]" % (content, url) return e
def get_rt(self, e): url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey=%s&q=%s&page_limit=1" % (tools.config.rtAPIkey, urllib2.quote(e.input) ) response = urllib2.urlopen(url).read() movie = json.loads(response) movie = movie['movies'][0] concensus = "" if 'critics_consensus' in movie: concensus = "- " + movie['critics_consensus'] url = tools.shorten_url(movie['links']['alternate']) e.output = "%s (%s) - Critics: %s - Users: %s %s [ %s ]" % (movie['title'], str(movie['year']), str(movie['ratings']['critics_score']), str(movie['ratings']['audience_score']), concensus, url ) return e
def get_rt(self, e): url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey=%s&q=%s&page_limit=1" % ( tools.config.rtAPIkey, urllib2.quote(e.input)) response = urllib2.urlopen(url).read() movie = json.loads(response) movie = movie['movies'][0] concensus = "" if 'critics_consensus' in movie: concensus = "- " + movie['critics_consensus'] url = tools.shorten_url(movie['links']['alternate']) e.output = "%s (%s) - Critics: %s - Users: %s %s [ %s ]" % ( movie['title'], str( movie['year']), str(movie['ratings']['critics_score']), str(movie['ratings']['audience_score']), concensus, url) return e
def get_weather_alert_data(alert_url): try: request = urllib2.urlopen(alert_url) dom = xml.dom.minidom.parse(request) msgType = dom.getElementsByTagName('msgType')[0].childNodes[0].data note = dom.getElementsByTagName('description')[0].childNodes[0].data note = note.replace("\n", " ") pattern = re.compile("\s+") note = pattern.sub(" ", note) ##turning off the text for now because its too much spam note = "" event = dom.getElementsByTagName('event')[0].childNodes[0].data urgency = dom.getElementsByTagName('urgency')[0].childNodes[0].data severity = dom.getElementsByTagName('severity')[0].childNodes[0].data certainty = dom.getElementsByTagName('certainty')[0].childNodes[0].data senderName = dom.getElementsByTagName( 'senderName')[0].childNodes[0].data ## Use the "effective" value because "sent" changes every time ## the document is retrieved updated = dom.getElementsByTagName('effective')[0].childNodes[0].data updated = dateparser(updated) updated = (updated - updated.utcoffset()).replace(tzinfo=None) ago = (datetime.datetime.utcnow() - updated).seconds / 60 short_url = tools.shorten_url(alert_url) ## old text, too verbose ##alert_text = "[%s] %s: %s Urgency: %s Severity: %s Certainty: %s | %s (%s minutes ago)" % (senderName, msgType, event, urgency, severity, certainty, note[0:170], ago) ## new text is self limiting to the IRC limit of 428 characters alert_text_start = "[%s] %s: %s" % (senderName, msgType, event) alert_text_end = "(%s minutes ago) [ %s ]" % (ago, short_url) alert_text = "%s | %s %s" % (alert_text_start, note[:425 - ( len(alert_text_start + alert_text_end))], alert_text_end) return alert_text except Exception as inst: print "get_weather_alert_data: " + str(inst) pass
def get_weather_alert_data(alert_url): try: request = urllib2.urlopen(alert_url) dom = xml.dom.minidom.parse(request) msgType = dom.getElementsByTagName('msgType')[0].childNodes[0].data note = dom.getElementsByTagName('description')[0].childNodes[0].data note = note.replace("\n"," ") pattern = re.compile("\s+") note = pattern.sub(" ", note) ##turning off the text for now because its too much spam note = "" event = dom.getElementsByTagName('event')[0].childNodes[0].data urgency = dom.getElementsByTagName('urgency')[0].childNodes[0].data severity = dom.getElementsByTagName('severity')[0].childNodes[0].data certainty = dom.getElementsByTagName('certainty')[0].childNodes[0].data senderName = dom.getElementsByTagName('senderName')[0].childNodes[0].data ## Use the "effective" value because "sent" changes every time ## the document is retrieved updated = dom.getElementsByTagName('effective')[0].childNodes[0].data updated = dateparser(updated) updated = (updated - updated.utcoffset()).replace(tzinfo=None) ago = (datetime.datetime.utcnow() - updated).seconds/60 short_url = tools.shorten_url(alert_url) ## old text, too verbose ##alert_text = "[%s] %s: %s Urgency: %s Severity: %s Certainty: %s | %s (%s minutes ago)" % (senderName, msgType, event, urgency, severity, certainty, note[0:170], ago) ## new text is self limiting to the IRC limit of 428 characters alert_text_start = "[%s] %s: %s" % (senderName, msgType, event) alert_text_end = "(%s minutes ago) [ %s ]" % (ago, short_url) alert_text = "%s | %s %s" % (alert_text_start, note[:425-(len(alert_text_start+alert_text_end))], alert_text_end) return alert_text except Exception as inst: print "get_weather_alert_data: " + str(inst) pass
def get_urbandictionary_wotd(): url = "http://www.urbandictionary.com" try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() opener.close() page = BeautifulSoup(page) first_definition = "" first_word = page.findAll('div', attrs={"class": "word" })[0].contents[1].contents[0].string first_word = first_word.encode("utf-8", 'ignore') for content in page.findAll('div', attrs={"class": "definition"})[0].contents: if content.string != None: first_definition += content.string first_definition = first_definition.encode("utf-8", 'ignore') first_definition = tools.decode_htmlentities( first_definition.decode("utf-8", 'ignore')).encode("utf-8", 'ignore') first_definition = first_definition.replace("\n", " ") wotd = (first_word.decode('utf-8') + ": " + first_definition.decode('utf-8') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore') return wotd except: print "!ud wotd went wrong" return
def gwiki(bot, e): url = ('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+' + urllib.parse.quote(e.input)) request = urllib.request.Request(url, None, {'Referer': 'http://irc.00id.net'}) response = urllib.request.urlopen(request) results_json = json.loads(response.read().decode('utf-8')) results = results_json['responseData']['results'] regexstring = "wikipedia.org/wiki/" result = results[0] m = re.search(regexstring,result['url']) if (m): url = result['url'] url = tools.shorten_url(url.replace('%25','%')) #content = result['content'].encode('utf-8') content = tools.decode_htmlentities(tools.remove_html_tags(result['content'])) content = re.sub('\s+', ' ', content) content = content.replace("...", "") #print content #content = content.decode('unicode-escape') #e.output = content e.output = "%s [ %s ]" % (content, url) return e
def advocate_beer(self, e): query = e.input # get the name, rating and style of a beer from beeradvocate.com url = tools.google_url("site:beeradvocate.com " + query, "/beer/profile/[0-9]*?/[0-9]+") # url = "http://beeradvocate.com/beer/profile/306/1212/" socket.setdefaulttimeout(30) try: beerpage = urllib.request.urlopen(url).read().decode("utf-8") except: return None socket.setdefaulttimeout(10) titlestart = beerpage.find("<title>") + 7 titleend = beerpage.find(" - ", titlestart) beertitle = beerpage[titlestart:titleend] score_start_tag = '<span class="BAscore_big">' score_end_tag = "Reviews</td>" start = beerpage.find(score_start_tag) + len(score_start_tag) score_line = beerpage[start : start + 100] find_start_tag = '</span>\n<br><a href="/help/index?topic=ratings"><b>' find_end_tag = "</b></a>\n<br>-<br>" # print score_line grade = score_line[0 : score_line.find(find_start_tag)] # print "\n" + grade grade_wording = score_line[score_line.find(find_start_tag) + len(find_start_tag) : score_line.rfind(find_end_tag)] # print grade_wording if grade_wording == "": grade_wording = "N/A" find_start_tag = find_end_tag find_end_tag = "</td>" num_reviews = score_line[score_line.rfind(find_start_tag) + len(find_start_tag) : score_line.find(find_end_tag)] # print num_reviews find_start_tag = "Style | ABV" style_line = beerpage[beerpage.find(find_start_tag) : beerpage.find(find_start_tag) + 120] find_start_tag = "><b>" find_end_tag = "</b></a> | " style = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag)] find_start_tag = find_end_tag find_end_tag = "% <a href" abv = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag) + 1] response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % ( beertitle, grade, grade_wording, num_reviews, style, abv, tools.shorten_url(url), ) e.output = response_string return e
def get_urbandictionary_wotd(): url = "http://www.urbandictionary.com" try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() opener.close() page = BeautifulSoup(page) first_definition = "" first_word = page.findAll('div',attrs={"class" : "word"})[0].contents[1].contents[0].string first_word = first_word.encode("utf-8", 'ignore') for content in page.findAll('div',attrs={"class" : "definition"})[0].contents: if content.string != None: first_definition += content.string first_definition = first_definition.encode("utf-8", 'ignore') first_definition = tools.decode_htmlentities(first_definition.decode("utf-8", 'ignore')).encode("utf-8", 'ignore') first_definition = first_definition.replace("\n", " ") wotd = (first_word.decode('utf-8') + ": " + first_definition.decode('utf-8') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore') return wotd except: print "!ud wotd went wrong" return
def get_urbandictionary(self, e): searchterm = e.input url = "http://www.urbandictionary.com/define.php?term=%s" % urllib2.quote(searchterm) if searchterm=="wotd": e.output = get_urbandictionary_wotd() return e if searchterm== "": url = "http://www.urbandictionary.com/random.php" try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() page = BeautifulSoup(page) first_definition= "" if page.find(id='not_defined_yet') != None: return None ## depending on the search results the first word may be contained directly under the <td class='word'> tag ## or it may be the text contents of a <a href> tag ## we first try to get it from inside a <td><a href>[word]</a></td> type structure ## if that fails, get the word under the initial <td> tag try: first_word = page.findAll('td',attrs={"class" : "word"})[0].contents[1].string except: first_word = page.findAll('td',attrs={"class" : "word"})[0].contents[0].string first_word = first_word.replace("\n","") #first_word = first_word.encode("utf-8", 'ignore') for content in page.findAll('div',attrs={"class" : "definition"})[0].contents: if content.string != None: first_definition += content.string #first_definition = first_definition.encode("utf-8", 'ignore') first_definition = tools.decode_htmlentities(first_definition).encode("utf-8", 'ignore') first_word = tools.decode_htmlentities(first_word).encode("utf-8", 'ignore') first_definition = first_definition.replace("\n", " ") first_definition = first_definition.replace("\r", " ") first_definition = first_definition[0:392] first_definition = ((first_word + ": " + first_definition).decode("utf-8", 'ignore') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore') #print first_definition e.output = first_definition return e except: print "!ud %s went wrong" % searchterm return
def get_wiki(self, e, urlposted=False): # read the first paragraph of a wikipedia article searchterm = e.input if urlposted: url = searchterm else: if searchterm == "": url = "http://en.wikipedia.org/wiki/Special:Random" else: url = tools.google_url("site:wikipedia.org " + searchterm, "wikipedia.org/wiki") title = "" if url and url.find("wikipedia.org/wiki/File:") != -1: file_title = get_wiki_file_description(url) if file_title: e.output = file_title return e if url and url.find("wikipedia.org/wiki/") != -1: try: opener = urllib2.build_opener() opener.addheaders = [("User-Agent", "Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() if url.find("#") != -1: anchor = url.split("#")[1] page = page[page.find('id="' + anchor) :] page = BeautifulSoup(page) tables = page.findAll("table") for table in tables: table.extract() page = page.findAll("p") if str(page[0])[0:9] == "<p><span ": page = unicode(page[1].extract()) else: page = unicode(page[0].extract()) title = tools.remove_html_tags(re.search("(?s)\<p\>(.*?)\<\/p\>", page).group(1)) title = title.encode("utf-8", "ignore") title = title.replace("<", "") rembracket = re.compile(r"\[.*?\]") title = rembracket.sub("", title) # title = re.sub("\&.*?\;", " ", title) title = title.replace("\n", " ") title = tools.decode_htmlentities(title.decode("utf-8", "ignore")).encode("utf-8", "ignore") title = title[0:420] if title.rfind(".") != -1: title = title[0 : title.rfind(".") + 1] if not urlposted: url = tools.shorten_url(url) title = (title.decode("utf-8", "ignore") + " [ %s ]" % url).encode("utf-8", "ignore") except Exception as inst: print "!wiki " + searchterm + " : " + str(inst) title = tools.remove_html_tags(re.search("\<p\>(.*?\.) ", str(page)).group(1)) e.output = title return e
def advocate_beer(self, e): query = e.input #get the name, rating and style of a beer from beeradvocate.com url = tools.google_url("site:beeradvocate.com " + query, "/beer/profile/[0-9]*/") #url = "http://beeradvocate.com/beer/profile/306/1212/" socket.setdefaulttimeout(30) try: beerpage = urllib2.urlopen(url).read() #.decode("ISO-8859-1") except: return None socket.setdefaulttimeout(10) titlestart = beerpage.find("<title>") + 7 titleend = beerpage.find(" - ", titlestart) beertitle = beerpage[titlestart:titleend] score_start_tag = '<span class="BAscore_big">' score_end_tag = 'Reviews</td>' start = beerpage.find(score_start_tag) + len(score_start_tag) score_line = beerpage[start:start + 50] find_start_tag = "</span>\n<br>" find_end_tag = "<br>" #print score_line grade = score_line[0:score_line.find(find_start_tag)] #print "\n" + grade grade_wording = score_line[score_line.find(find_start_tag) + len(find_start_tag):score_line. rfind(find_end_tag)] #print grade_wording find_start_tag = find_end_tag find_end_tag = "</td>" num_reviews = score_line[score_line.rfind(find_start_tag) + len(find_start_tag):score_line.find(find_end_tag)] #print num_reviews find_start_tag = "Style | ABV" style_line = beerpage[beerpage.find(find_start_tag ):beerpage.find(find_start_tag) + 120] find_start_tag = "><b>" find_end_tag = "</b></a> | " style = style_line[style_line.find(find_start_tag) + len(find_start_tag):style_line.find(find_end_tag)] find_start_tag = find_end_tag find_end_tag = "% <a href" abv = style_line[style_line.find(find_start_tag) + len(find_start_tag):style_line.find(find_end_tag) + 1] response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % ( beertitle, grade, grade_wording, num_reviews, style, abv, tools.shorten_url(url)) e.output = response_string return e
def get_urbandictionary(self, e): searchterm = e.input url = "http://www.urbandictionary.com/define.php?term=%s" % urllib2.quote( searchterm) if searchterm == "wotd": e.output = get_urbandictionary_wotd() return e if searchterm == "": url = "http://www.urbandictionary.com/random.php" try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() page = BeautifulSoup(page) first_definition = "" if page.find(id='not_defined_yet') != None: return None ## depending on the search results the first word may be contained directly under the <td class='word'> tag ## or it may be the text contents of a <a href> tag ## we first try to get it from inside a <td><a href>[word]</a></td> type structure ## if that fails, get the word under the initial <td> tag try: first_word = page.findAll('td', attrs={"class": "word"})[0].contents[1].string except: first_word = page.findAll('td', attrs={"class": "word"})[0].contents[0].string first_word = first_word.replace("\n", "") #first_word = first_word.encode("utf-8", 'ignore') for content in page.findAll('div', attrs={"class": "definition"})[0].contents: if content.string != None: first_definition += content.string #first_definition = first_definition.encode("utf-8", 'ignore') first_definition = tools.decode_htmlentities(first_definition).encode( "utf-8", 'ignore') first_word = tools.decode_htmlentities(first_word).encode( "utf-8", 'ignore') first_definition = first_definition.replace("\n", " ") first_definition = first_definition.replace("\r", " ") first_definition = first_definition[0:392] first_definition = ( (first_word + ": " + first_definition).decode("utf-8", 'ignore') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore') #print first_definition e.output = first_definition return e except: print "!ud %s went wrong" % searchterm return