def personal_station(self, location): """gets weather data from a personal weather station""" try: if re.match("[A-Z]{8}\d+", location): # already a PWSid pws_id = location else: page = geturl(url=self.locate_url, opts={u"query": location}, referer=self.base_url).encode("utf-8") xml = ElementTree.fromstring(page) pws_id = xml.find(".//pws/station[1]/id").text page = geturl(url=self.pws_url, opts={u"ID": pws_id}).encode("utf-8") xml = ElementTree.fromstring(page) loc = xml.find("location/full").text time = xml.find("observation_time_rfc822").text conditions = "N/A" temp = float(xml.find("temp_f").text) tempstr = xml.find("temperature_string").text humidity = xml.find("relative_humidity").text + "%" wind = xml.find("wind_string").text return self._format_weather(locals()) except Exception, e: self.log.warn(u"error in module %s" % self.__module__) self.log.exception(e) return "error looking up conditions for location: %s" % location
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'IMDb Title Search': main = soup.body.find('div', id='main') name = self.normalize(name) url = None for p in main('p'): if p.b is not None: section = p.b.renderContents() if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'): for a in p('a'): text = a.renderContents() if text: normalized = self.normalize(text) if normalized == name: url = urljoin(self.imdb_url, a['href']) break if url: break else: raise ValueError('no exact matches') soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace(' - IMDb', '')) return realname, rating
def personal_station(self, location): '''gets weather data from a personal weather station''' try: if re.match('[A-Z]{8}\d+', location): # already a PWSid pws_id = location else: page = geturl(url=self.locate_url, opts={u'query':location}, referer=self.base_url).encode('utf-8') xml = ElementTree.fromstring(page) pws_id = xml.find('.//pws/station[1]/id').text page = geturl(url=self.pws_url, opts={u'ID':pws_id}).encode('utf-8') xml = ElementTree.fromstring(page) loc = xml.find('location/full').text time = xml.find('observation_time_rfc822').text conditions = 'N/A' temp = float(xml.find('temp_f').text) tempstr = xml.find('temperature_string').text humidity = xml.find('relative_humidity').text + '%' wind = xml.find('wind_string').text return self._format_weather(locals()) except Exception, e: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(e) return "error looking up conditions for location: %s" % location
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, { 's': 'tt', 'q': name }, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'IMDb Title Search': main = soup.body.find('div', id='main') name = self.normalize(name) url = None for p in main('p'): if p.b is not None: section = p.b.renderContents() if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'): for a in p('a'): text = a.renderContents() if text: normalized = self.normalize(text) if normalized == name: url = urljoin(self.imdb_url, a['href']) break if url: break else: raise ValueError('no exact matches') soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace( ' - IMDb', '')) return realname, rating
def slutrating(phrase): phrase = cleanurl(phrase) try: data = geturl(searchURL, opts={u'q': phrase, u'safe': u'off'}) unsafe = int(match_re.search(data).group(1).replace(u',', u'')) except AttributeError: unsafe = 0 try: data = geturl(searchURL, opts={u'q': phrase, u'safe': u'active'}) try: filtered = filter_re.search(data).group(1) raise WordFiltered(filtered) except AttributeError: pass safe = int(match_re.search(data).group(1).replace(u',', u'')) except AttributeError: safe = 0 value = float(unsafe - safe) / float(unsafe) if value < 0: value = 0 return value
def personal_station(self, location): '''gets weather data from a personal weather station''' try: if re.match('[A-Z]{8}\d+', location): # already a PWSid pws_id = location else: page = geturl(url=self.locate_url, opts={ u'query': location }, referer=self.base_url).encode('utf-8') xml = ElementTree.fromstring(page) pws_id = xml.find('.//pws/station[1]/id').text page = geturl(url=self.pws_url, opts={ u'ID': pws_id }).encode('utf-8') xml = ElementTree.fromstring(page) loc = xml.find('location/full').text time = xml.find('observation_time_rfc822').text conditions = 'N/A' temp = float(xml.find('temp_f').text) tempstr = xml.find('temperature_string').text humidity = xml.find('relative_humidity').text + '%' wind = xml.find('wind_string').text return self._format_weather(locals()) except Exception, e: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(e) return "error looking up conditions for location: %s" % location
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, {'s': 'tt', 'q': name, 'exact': 'true'}, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'Find - IMDb': url = urljoin(self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href']) soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace(' - IMDb', '')) return realname, rating
def response(self, nick, args, kwargs): try: query = args[0] doc = geturl(self.search, opts={u'verbose': u'on', u'name': query}) drink = self.drink.search(doc).group(1) url = urljoin(self.baseurl, drink) doc = geturl(url) title = self.title.search(doc).group(1) ingredients = self.ingredients.findall(doc) instructions = self.instructions.search(doc).group(1) response = strip_html(u'%s - %s - %s' % (title, u', '.join(ingredients), instructions)) except Exception, error: response = u"That's a made-up drink, sorry."
def response(self, nick, args, kwargs): source = self.sources[args[0]] try: query = args[1] except: query = None try: num = int(query) query = None except: num = None if num: url = source.bynum.replace(u'num', unicode(num)) opts = None elif query: url = source.search opts = dict(source.opts) opts[source.query] = query else: url = source.random opts = None doc = geturl(url, opts=opts) entries = source.entries.findall(doc) if query: entries = filter(None, entries) entry = random.choice(entries) return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))
def response(self, nick, args, kwargs): # sanity check args and pick default search location desc, loc = args if desc.startswith('@') and not loc: raise Exception('invalid search') if not loc: if self.learn: loc = self.learn.lookup(u'location', nick) if not loc: loc = self.default_location # perform search opts = opts={'find_desc': desc, 'ns': 1, 'find_loc': loc, 'rpp': 1} page = geturl(SEARCHURL, opts) # extract meaningful data from first result soup = BeautifulSoup(page, convertEntities='html') result = soup.body.find('div', 'businessresult clearfix') name = result.find('a', id='bizTitleLink0').findAll(text=True) name = clean_re.search(u''.join(name)).group(1) cat = result.find('div', 'itemcategories').a.renderContents() rating = result.find('div', 'rating').img['alt'] rating = rating.replace(' star rating', '') reviews = result.find('span', 'reviews').renderContents() address = [i.strip() for i in result.address.findAll(text=True)] address = u', '.join(part for part in address if part) url = urljoin(BASEURL, result.find('a', id='bizTitleLink0')['href']) # return rendered page return RESULT_FMT % {'nick': nick, 'name': name, 'cat': cat, 'rating': rating, 'reviews': reviews, 'address': address, 'url': url}
def translate(self, text, src, dst): """Perform the translation""" opts = {'client': 't', 'text': text, 'hl': 'en', 'sl': self.langs[src], 'tl': self.langs[dst], 'ie': 'UTF-8', 'oe': 'UTF-8', 'multires': '0', 'prev': 'btn', 'ssel': '4', 'tsel': '4', 'sc': '1'} url = 'http://translate.google.com/translate_a/t' res = geturl(url, opts) while u',,' in res: res = res.replace(u',,', u',"",') res = simplejson.loads(res) try: det = self.lookup[res[2]].capitalize() except StandardError: det = None while isinstance(res, list) and res: res = res[0] if src == 'auto' and det: res = u'[detected %s] %s' % (det, res) if res: return res
def response(self, nick, args, kwargs): fail = BeautifulSoup(geturl(self.url)).h1 return self.spaces_re.sub(' ', strip_html( u'%s: %s: %s %s: %s' % ( nick, self.col('red', text='FAIL'), self.fail_re.search(fail.renderContents()).group(1), self.col('green', text='FIX'), self.fail_re.search(fail.findNext('h1').renderContents()).group(1))))
def request(self, url, parameters): request = oauth.Request.from_consumer_and_token(self.consumer, token=self.token, http_method='GET', http_url=url, parameters=parameters) request.sign_request(self.method, self.consumer, self.token) return geturl(request.to_url().replace('+', '%20'))
def time(self): try: doc = geturl(DoomsDay._url) time = self._re_time.search(doc).group(1) return time except Exception, error: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(error) return u'UNKNOWN'
def translate(self, text, src, dst): """Perform the translation""" opts = {'langpair': '%s|%s' % (self.langs[src], self.langs[dst]), 'v': '1.0', 'q': text} res = simplejson.loads(geturl(self.url, opts))['responseData'] text = strip_html(res['translatedText']) try: text = u'[detected %s] %s' % (self.lookup[res['detectedSourceLanguage']].capitalize(), text) except KeyError: pass return text
def level(self): try: doc = geturl(Terror._url) level = self._re_level.search(doc).group(1) color = self._color_map[level.lower()] return self.colorlib.get_color(color, text=level) except Exception, error: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(error) return u'UNKNOWN'
def forecast(self, location): '''get weather forecast''' try: page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8') xml = ElementTree.fromstring(page) text = strip_html(xml.find('.//fcttext').text) except Exception, e: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(e) return "error looking up forecast for location: %s" % location
def forecast(self, location): """get weather forecast""" try: page = geturl(url=self.forecast_url, opts={u"query": location}).encode("utf-8") xml = ElementTree.fromstring(page) text = strip_html(xml.find(".//fcttext").text) except Exception, e: self.log.warn(u"error in module %s" % self.__module__) self.log.exception(e) return "error looking up forecast for location: %s" % location
def response(self, nick, args, kwargs): fail = BeautifulSoup(geturl(self.url)).h1 return self.spaces_re.sub( ' ', strip_html(u'%s: %s: %s %s: %s' % (nick, self.col('red', text='FAIL'), self.fail_re.search(fail.renderContents()).group(1), self.col('green', text='FIX'), self.fail_re.search( fail.findNext('h1').renderContents()).group(1))))
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, { 's': 'tt', 'q': name, 'exact': 'true' }, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'Find - IMDb': url = urljoin( self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href']) soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace( ' - IMDb', '')) return realname, rating
def response(self, nick, args, kwargs): doc = geturl(self.random, add_headers={'Accept': '*/*'}) soup = BeautifulSoup(doc) main = soup.find(u'div', attrs={u'id': u'main'}) confs = main.findAll(u'div', attrs={u'class': u'content'}) conf = random.choice(confs) conf = [unicode(p) for p in conf.findAll(u'p')] conf = u' '.join(conf) conf = strip_html(conf) conf = conf.strip() return conf
def bodycount(self): try: doc = geturl(self._bodycount_url) data = self._bodycount_re.search(doc).group(1) data = decode(data, 'ascii') data = strip_html(data) data = self._re_whitespace.sub(u' ', data) data = data.strip() return data except Exception, error: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(error) return u'UNKNOWN'
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'IMDb Title Search': main = soup.body.find('div', id='main') name = self.normalize(name) url = None for p in main('p', style=None): for row in p.table('tr'): link = row('td')[2].a normalized = self.normalize(link.renderContents()) self.log.debug('Comparing: %r == %r', normalized, name) if normalized == name: url = urljoin(self.imdb_url, link['href']) break if url: break else: raise ValueError('no exact matches') soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = strip_html(soup.find('span', 'rating-rating').renderContents()) realname = strip_html(soup.title.renderContents().replace(' - IMDb', '')) return realname, rating
def response(self, nick, args, kwargs): query = args[0] if query is None or query == u'': url = self.random_url else: query = u' '.join(query.split()) query = query.replace(u' ', u'_') query = urllib.quote(query) + u'.html' url = urljoin(self.baseurl, query) doc = geturl(url) results = self.artfart.findall(doc) result = random.choice(results) title, art = result art = strip_html(art) return u'>>> %s <<<\n%s' % (title, art)
def get_soup(self, query): if isinstance(query, (list, tuple)): query = u' '.join(query) # load page if query == u'random': opts = {} url = urljoin(self.base_url, self.random_path) else: opts = {u'search': query, u'go': u'Go'} url = urljoin(self.base_url, self.search_path) page = geturl(url, referer=self.base_url, opts=opts, size=self.sample_size) # create BeautifulSoup document tree soup = BeautifulSoup(page) # extract title minus WP advert title = soup.title.string.replace(self.advert, u'') # remove all tabular data/sidebars for table in soup.findAll(u'table'): table.extract() # remove disambiguation links for dablink in soup.findAll(u'div', attrs={u'class': u'dablink'}): dablink.extract() # remove latitude/longitude metadata for places for coord in soup.findAll(u'span', attrs={u'id': u'coordinates'}): coord.extract() # strip non-english content wrappers for span in soup.findAll(u'span', attrs={u'lang': True}): span.extract() # remove IPA pronounciation guidelines for span in soup.findAll(u'span', attrs={u'class': u'IPA'}): span.extract() for link in soup.findAll(u'a', text=u'IPA'): link.extract() for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}): span.extract() return soup, title
def response(self, nick, args, kwargs): query = args[0] if query is None or query == u'': url = self.random_url else: query = u' '.join(query.split()) query = query.replace(u' ', u'_') query = encode(query, 'utf-8') query = urllib.quote(query) + u'.php' url = urljoin(self.baseurl, query) doc = geturl(url) result = self.joke.findall(doc)[0] result = strip_html(result) # cleanup output a bit.. some funny whitespace in it -cj result = result.replace(u'\x14', u' ') result = result.replace(u'\n', u' ') result = re.sub(r'\s{2,}', u' ', result) return result.strip()
def official_station(self, location): '''gets weather data from an official station (typically an airport)''' try: page = geturl(url=self.station_url, opts={u'query':location}).encode('utf-8') xml = ElementTree.fromstring(page) loc = xml.find('display_location/full').text time = xml.find('local_time').text conditions = xml.find('weather').text temp = float(xml.find('temp_f').text) tempstr = xml.find('temperature_string').text humidity = xml.find('relative_humidity').text wind = xml.find('wind_string').text return self._format_weather(locals()) except Exception, e: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(e) return "error looking up conditions for location: %s" % location
def response(self, nick, args, kwargs): query = args[0] if query is None or query == u'': url = self.random_url else: query = u' '.join(query.split()) query = query.replace(u' ', u'_') query = query.encode('utf-8', 'replace') query = urllib.quote(query) + u'.php' url = urljoin(self.baseurl, query) doc = geturl(url) result = self.joke.findall(doc)[0] result = strip_html(result) # cleanup output a bit.. some funny whitespace in it -cj result = result.replace(u'\x14', u' ') result = result.replace(u'\n', u' ') result = re.sub(r'\s{2,}', u' ', result) return result.strip()
def official_station(self, location): """gets weather data from an official station (typically an airport)""" try: page = geturl(url=self.station_url, opts={u"query": location}).encode("utf-8") xml = ElementTree.fromstring(page) loc = xml.find("display_location/full").text time = xml.find("local_time").text conditions = xml.find("weather").text temp = float(xml.find("temp_f").text) tempstr = xml.find("temperature_string").text humidity = xml.find("relative_humidity").text wind = xml.find("wind_string").text return self._format_weather(locals()) except Exception, e: self.log.warn(u"error in module %s" % self.__module__) self.log.exception(e) return "error looking up conditions for location: %s" % location
def response(self, nick, args, kwargs): page = geturl(self._baseurl) try: score = self._score_re.search(page).group(1) dem = self._dem_re.search(score).groups() gop = self._gop_re.search(score).groups() # XXX diebold patch :D #dem, gop = (dem[0], gop[1]), (gop[0], dem[1]) tie = None try: tie = self._tie_re.search(score).groups() except AttributeError: pass except AttributeError: raise Exception(u"couldn't parse page") output = [self.colorize(u'blue', *dem), self.colorize(u'red', *gop)] if tie: output.append(self.colorize(u'white', *tie)) return u'%s: Projected Senate Seats 2010: %s' % (nick, u', '.join(output))
def get_quote(self, symbols): """Looks up the symbol from finance.yahoo.com, returns formatted result""" symbols = [quote(symbol) for symbol in symbols.split()] url = Yahoo._quote_url.replace(u'SYMBOL', "+".join(symbols)) page = geturl(url) results = [] for line in page.splitlines(): data = csv.reader([line]).next() symbol = data[0] name = data[1] trade_time, last_trade = strip_html(data[3]).split(" - ") last_trade = locale.atof(last_trade) try: last_close = locale.atof(data[2]) exchange = False except ValueError: last_close = last_trade exchange = True if trade_time == "N/A": trade_time = u'market close' if exchange: results.append(u'%s (%s) - %s: %.4f' % (name, symbol, trade_time, last_trade)) else: delta = last_trade - last_close try: delta_perc = delta * 100.0 / last_close except ZeroDivisionError: delta_perc = 0.00 if delta < 0: color = u'red' elif delta > 0: color = u'green' else: color = u'white' text = self.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc)) results.append(u'%s (%s) - Open: %.2f | %s: %s' % (name, symbol, last_close, trade_time, text)) return u'\n'.join(results)
def gettop(self): """Get box office ratings""" soup = BeautifulSoup(geturl(self.imdb_top)) table = soup.body.find('div', id='boxoffice').table data = [] for row in table('tr')[1:]: items = row('td') data.append({'title': strip_html(items[2].a.renderContents()), 'weekend': items[3].renderContents().strip(), 'gross': items[4].renderContents().strip()}) tsize = max(len(item['title']) for item in data) wsize = max(len(item['weekend']) for item in data) gsize = max(len(item['gross']) for item in data) output = ['# / Title / Weekend / Gross'] for i, item in enumerate(data): output.append('%s %s - %s / %s' % ( str(i + 1).rjust(2), item['title'].ljust(tsize), item['weekend'].ljust(wsize), item['gross'].ljust(gsize))) return '\n'.join(output)
def gettop(self): """Get box office ratings""" soup = BeautifulSoup(geturl(self.imdb_top)) table = soup.body.find('div', id='boxoffice').table data = [] for row in table('tr')[1:]: items = row('td') data.append({ 'title': strip_html(items[2].a.renderContents()), 'weekend': items[3].renderContents().strip(), 'gross': items[4].renderContents().strip() }) tsize = max(len(item['title']) for item in data) wsize = max(len(item['weekend']) for item in data) gsize = max(len(item['gross']) for item in data) output = ['# / Title / Weekend / Gross'] for i, item in enumerate(data): output.append( '%s %s - %s / %s' % (str(i + 1).rjust(2), item['title'].ljust(tsize), item['weekend'].ljust(wsize), item['gross'].ljust(gsize))) return '\n'.join(output)
def translate(self, text, src, dst): """Perform the translation""" opts = {'q': text, 'client': 't', 'sl': 'en', 'tl': 'es', 'hl': 'en', 'dt': 'at', 'dt': 'bd', 'dt': 'ex', 'dt': 'ld', 'dt': 'md', 'dt': 'qca', 'dt': 'rw', 'dt': 'rm', 'dt': 'ss', 'dt': 't', 'ie': 'UTF-8', 'oe': 'UTF-8', 'ssel': '3', 'tsel': '3', 'kc': '0', 'tk': '286462.160648'} url = 'http://translate.google.com/translate_a/single' res = geturl(url, opts, referer='https://translate.google.com/') while u',,' in res: res = res.replace(u',,', u',"",') res = simplejson.loads(res) try: det = self.lookup[res[2]].capitalize() except StandardError: det = None while isinstance(res, list) and res: res = res[0] if src == 'auto' and det: res = u'[detected %s] %s' % (det, res) if res: return res
def _response(self, nick, args, kwargs): # sanity check args and pick default search location desc, loc = args if desc.startswith('@') and not loc: raise Exception('invalid search') if not loc: if self.learn: loc = self.learn.lookup(u'location', nick) if not loc: loc = self.default_location # perform search opts = opts = {'find_desc': desc, 'ns': 1, 'find_loc': loc, 'rpp': 1} page = geturl(SEARCHURL, opts) # extract meaningful data from first result soup = BeautifulSoup(page, convertEntities='html') result = soup.body.find('div', 'businessresult clearfix') name = result.find('a', id='bizTitleLink0').findAll(text=True) name = clean_re.search(u''.join(name)).group(1) cat = result.find('div', 'itemcategories').a.renderContents() rating = result.find('div', 'rating').img['alt'] rating = rating.replace(' star rating', '') reviews = result.find('span', 'reviews').renderContents() address = [i.strip() for i in result.address.findAll(text=True)] address = u', '.join(part for part in address if part) url = urljoin(BASEURL, result.find('a', id='bizTitleLink0')['href']) # return rendered page return RESULT_FMT % { 'nick': nick, 'name': name, 'cat': cat, 'rating': rating, 'reviews': reviews, 'address': address, 'url': url }
def _getpage(self, url, opts=None): page = geturl(url, referer=self.baseurl, opts=opts) # HTMLParser doesn't handle this very well.. see: # http://www.crummy.com/software/BeautifulSoup/3.1-problems.html page = self.scripts_re.sub('', page) soup = BeautifulSoup(page) # get page title title = soup.title.string if self.advert and self.advert in title: title = title.replace(self.advert, '') # remove all tabular data/sidebars for table in soup.findAll('table'): table.extract() # remove disambiguation links for div in soup.findAll('div', 'dablink'): div.extract() # remove latitude/longitude metadata for places for span in soup.findAll('span', id='coordinates'): span.extract() # strip non-english content wrappers for span in soup.findAll('span', lang=True): span.extract() # remove IPA pronounciation guidelines for span in soup.findAll('span', 'IPA'): span.extract() for a in soup.findAll('a', text='IPA'): a.extract() for span in soup.findAll('span', 'audiolink'): span.extract() return soup, title
def response(self, nick, args, kwargs): doc = json.loads(geturl(self.baseurl)) return doc['value']['joke'].replace('"', "'")