def get_imdb(self, e, urlposted=False): #reads title, rating, and movie description of movie titles searchterm = e.input if urlposted: url = searchterm else: url = tools.google_url("site:imdb.com/title " + searchterm, "imdb.com/title/tt\\d{7}/") title = "" if not url: pass elif url.find("imdb.com/title/tt") != -1: try: movietitle = "" rating = "" summary = "" imdbid = re.search("tt\\d{7}", url) imdburl = ('http://www.imdb.com/title/' + imdbid.group(0) + '/') opener = urllib2.build_opener() opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)"), ('Range', "bytes=0-40960")] pagetmp = opener.open(imdburl) page = BeautifulSoup(pagetmp.read(40960)) opener.close() movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(page.find('title'))).replace( " - IMDb", "")) movietitle = movietitle.replace("IMDb - ", "") movietitle = "Title: " + movietitle if page.find(id="overview-top") != None: page = page.find(id="overview-top").extract() if page.find("div", "star-box-giga-star") != None: rating = tools.remove_html_tags( str(page.find("div", "star-box-giga-star").text)) rating = " - Rating: " + rating if len(page.findAll('p')) == 2: summary = str(page.findAll('p')[1]) removelink = re.compile(r'\<a.*\/a\>') summary = removelink.sub('', summary) summary = tools.remove_html_tags(summary) summary = summary.replace('»', "") summary = tools.decode_htmlentities( summary.decode("utf-8", 'ignore')) summary = re.sub("\&.*?\;", " ", summary) summary = summary.replace("\n", " ") summary = " - " + summary title = movietitle + rating + summary if not urlposted: title = title + " [ %s ]" % url e.output = title.encode('utf-8', 'ignore') return e except Exception as inst: print "!imdb " + searchterm + ": " + str(inst) return None
def advocate_beer(self, e): query = e.input # get the name, rating and style of a beer from beeradvocate.com url = tools.google_url("site:beeradvocate.com " + query, "/beer/profile/[0-9]*?/[0-9]+") # url = "http://beeradvocate.com/beer/profile/306/1212/" socket.setdefaulttimeout(30) try: beerpage = urllib.request.urlopen(url).read().decode("utf-8") except: return None socket.setdefaulttimeout(10) titlestart = beerpage.find("<title>") + 7 titleend = beerpage.find(" - ", titlestart) beertitle = beerpage[titlestart:titleend] score_start_tag = '<span class="BAscore_big">' score_end_tag = "Reviews</td>" start = beerpage.find(score_start_tag) + len(score_start_tag) score_line = beerpage[start : start + 100] find_start_tag = '</span>\n<br><a href="/help/index?topic=ratings"><b>' find_end_tag = "</b></a>\n<br>-<br>" # print score_line grade = score_line[0 : score_line.find(find_start_tag)] # print "\n" + grade grade_wording = score_line[score_line.find(find_start_tag) + len(find_start_tag) : score_line.rfind(find_end_tag)] # print grade_wording if grade_wording == "": grade_wording = "N/A" find_start_tag = find_end_tag find_end_tag = "</td>" num_reviews = score_line[score_line.rfind(find_start_tag) + len(find_start_tag) : score_line.find(find_end_tag)] # print num_reviews find_start_tag = "Style | ABV" style_line = beerpage[beerpage.find(find_start_tag) : beerpage.find(find_start_tag) + 120] find_start_tag = "><b>" find_end_tag = "</b></a> | " style = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag)] find_start_tag = find_end_tag find_end_tag = "% <a href" abv = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag) + 1] response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % ( beertitle, grade, grade_wording, num_reviews, style, abv, tools.shorten_url(url), ) e.output = response_string return e
def get_metacritic(self, e): url = tools.google_url("site:metacritic.com " + e.input, "www.metacritic.com/") opener = urllib2.build_opener() opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() opener.close() page = BeautifulSoup(page) try: titleDiv = page.findAll('div',attrs={"class" : "product_title"})[0] except: return title = titleDiv.a.string.encode("utf-8", 'ignore') titleUrl = titleDiv.a['href'] if titleUrl.find("game/") > 0: category = 'Game - ' category += titleDiv.findAll('span',attrs={"class" : "platform"})[0].a.string elif titleUrl.find("movie/") > 0: category = "Movie" elif titleUrl.find("tv/") > 0: category = "TV" elif titleUrl.find("music/") > 0: print "got here!" category = "Music" # band name is here, append it to title title += " " + titleDiv.findAll('span',attrs={"class" : "band_name"})[0].string if category: category = "(%s) " % category # declare these to avoid null reference metaScore = "" userScore = "" try: metaScoreDiv = page.findAll('div',attrs={"class" : "metascore_wrap highlight_metascore"})[0] metaScore = metaScoreDiv.findAll('span',attrs={"class" : "score_value"})[0].string metaDesc = metaScoreDiv.findAll('span',attrs={"class" : "desc"})[0].string metaNum = metaScoreDiv.findAll('span',attrs={"class" : "count"})[0].a.span.string except: pass try: userScoreDiv = page.findAll('div',attrs={"class" : "userscore_wrap feature_userscore"})[0] userScore = userScoreDiv.findAll('span',attrs={"class" : "score_value"})[0].string userDesc = userScoreDiv.findAll('span',attrs={"class" : "desc"})[0].string userNum = userScoreDiv.find('span',attrs={"class" : "count"}).a.string except: pass if metaScore: metaScore = "Metascore: " + metaScore metaScore += " out of 100 - %s (%s Reviews)" % (metaDesc.strip(), metaNum.strip()) metaScore = "%s | " % metaScore if userScore: userScore = "User Score: " + userScore userScore += " out of 10 - %s (%s)" % (userDesc.strip(), userNum.strip()) if metaScore or userScore: e.output = "%s %s| %s%s" % (title, category, metaScore, userScore) return e
def get_wiki(self, e, urlposted=False): #read the first paragraph of a wikipedia article searchterm = e.input if urlposted: url = searchterm else: if searchterm == "": url = "http://en.wikipedia.org/wiki/Special:Random" else: url = tools.google_url("site:wikipedia.org " + searchterm,"wikipedia.org/wiki") title = "" if url and url.find("wikipedia.org/wiki/File:") != -1: file_title=get_wiki_file_description(url) if file_title: e.output = file_title return e if url and url.find("wikipedia.org/wiki/") != -1: try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() if url.find('#') != -1: anchor = url.split('#')[1] page = page[page.find('id="' + anchor):] page = BeautifulSoup(page) tables = page.findAll('table') for table in tables: table.extract() page = page.findAll('p') if str(page[0])[0:9] == '<p><span ': page = unicode(page[1].extract()) else: page = unicode(page[0].extract()) title = tools.remove_html_tags(re.search('(?s)\<p\>(.*?)\<\/p\>',page).group(1)) title = title.encode("utf-8", 'ignore') title = title.replace("<",""); rembracket = re.compile(r'\[.*?\]') title = rembracket.sub('',title) #title = re.sub("\&.*?\;", " ", title) title = title.replace("\n", " ") title = tools.decode_htmlentities(title.decode("utf-8", 'ignore')).encode("utf-8", 'ignore') title = title[0:420] if title.rfind(".")!=-1: title = title[0:title.rfind(".")+1] if not urlposted: url = tools.shorten_url(url) title = (title.decode('utf-8', 'ignore') + " [ %s ]" % url).encode('utf-8', 'ignore') except Exception as inst: print "!wiki " + searchterm + " : " + str(inst) title = tools.remove_html_tags(re.search('\<p\>(.*?\.) ',str(page)).group(1)) e.output = title return e
def get_wiki(self, e, urlposted=False): # read the first paragraph of a wikipedia article searchterm = e.input if urlposted: url = searchterm else: if searchterm == "": url = "http://en.wikipedia.org/wiki/Special:Random" else: url = tools.google_url("site:wikipedia.org " + searchterm, "wikipedia.org/wiki") title = "" if url and url.find("wikipedia.org/wiki/File:") != -1: file_title = get_wiki_file_description(url) if file_title: e.output = file_title return e if url and url.find("wikipedia.org/wiki/") != -1: try: opener = urllib2.build_opener() opener.addheaders = [("User-Agent", "Opera/9.10 (YourMom 8.0)")] pagetmp = opener.open(url) page = pagetmp.read() url = pagetmp.geturl() opener.close() if url.find("#") != -1: anchor = url.split("#")[1] page = page[page.find('id="' + anchor) :] page = BeautifulSoup(page) tables = page.findAll("table") for table in tables: table.extract() page = page.findAll("p") if str(page[0])[0:9] == "<p><span ": page = unicode(page[1].extract()) else: page = unicode(page[0].extract()) title = tools.remove_html_tags(re.search("(?s)\<p\>(.*?)\<\/p\>", page).group(1)) title = title.encode("utf-8", "ignore") title = title.replace("<", "") rembracket = re.compile(r"\[.*?\]") title = rembracket.sub("", title) # title = re.sub("\&.*?\;", " ", title) title = title.replace("\n", " ") title = tools.decode_htmlentities(title.decode("utf-8", "ignore")).encode("utf-8", "ignore") title = title[0:420] if title.rfind(".") != -1: title = title[0 : title.rfind(".") + 1] if not urlposted: url = tools.shorten_url(url) title = (title.decode("utf-8", "ignore") + " [ %s ]" % url).encode("utf-8", "ignore") except Exception as inst: print "!wiki " + searchterm + " : " + str(inst) title = tools.remove_html_tags(re.search("\<p\>(.*?\.) ", str(page)).group(1)) e.output = title return e
def advocate_beer(self, e): query = e.input #get the name, rating and style of a beer from beeradvocate.com url = tools.google_url("site:beeradvocate.com " + query, "/beer/profile/[0-9]*/") #url = "http://beeradvocate.com/beer/profile/306/1212/" socket.setdefaulttimeout(30) try: beerpage = urllib2.urlopen(url).read() #.decode("ISO-8859-1") except: return None socket.setdefaulttimeout(10) titlestart = beerpage.find("<title>") + 7 titleend = beerpage.find(" - ", titlestart) beertitle = beerpage[titlestart:titleend] score_start_tag = '<span class="BAscore_big">' score_end_tag = 'Reviews</td>' start = beerpage.find(score_start_tag) + len(score_start_tag) score_line = beerpage[start:start + 50] find_start_tag = "</span>\n<br>" find_end_tag = "<br>" #print score_line grade = score_line[0:score_line.find(find_start_tag)] #print "\n" + grade grade_wording = score_line[score_line.find(find_start_tag) + len(find_start_tag):score_line. rfind(find_end_tag)] #print grade_wording find_start_tag = find_end_tag find_end_tag = "</td>" num_reviews = score_line[score_line.rfind(find_start_tag) + len(find_start_tag):score_line.find(find_end_tag)] #print num_reviews find_start_tag = "Style | ABV" style_line = beerpage[beerpage.find(find_start_tag ):beerpage.find(find_start_tag) + 120] find_start_tag = "><b>" find_end_tag = "</b></a> | " style = style_line[style_line.find(find_start_tag) + len(find_start_tag):style_line.find(find_end_tag)] find_start_tag = find_end_tag find_end_tag = "% <a href" abv = style_line[style_line.find(find_start_tag) + len(find_start_tag):style_line.find(find_end_tag) + 1] response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % ( beertitle, grade, grade_wording, num_reviews, style, abv, tools.shorten_url(url)) e.output = response_string return e