def calculator(self, query): opts = dict(self.calcopts) opts['q'] = query doc = self.ua.openurl(self.search, opts=opts) if not self.reConversionDetected.search(doc): raise Exception, 'no conversion detected' response = self.reConversionResult.search(doc).group(1) response = stripHTML(response) return response
def spellcheck(self, query): opts = dict(self.spellcheck_opts) opts['q'] = query result = self.ua.openurl(self.search, opts=opts, referer=self.baseurl) try: result = self.correct.search(result).group(1) result = stripHTML(result) except: result = query return result
def clock(self, query): """Use google to look up time in a given location""" try: doc = self.ua.open(self.search, {'q': 'time in %s' % query}) soup = BeautifulSoup(doc) time = soup.find('img', src=self.clock_re).findNext('td') try: time.find('table').extract() except AttributeError: pass return stripHTML(time.renderContents().decode('utf-8')).strip() except: pass
def get_summary(self, query): soup, title = self.get_soup(query) # check if this is a disambiguation page, if so construct special page # there isn't a consistent style guide, so we just try to do the # most common format (ordered list of links). if this fails, return # a friendly failure for now if soup.find('div', attrs={'id': 'disambig'}): try: summary = '%s (Disambiguation) - ' % title for link in soup.find('ul').findAll('a'): title = str(link['title']).strip() if len(summary) + len(title) + 2 > self.summary_size: break if not summary.endswith(' '): summary += ', ' summary += title except: summary = 'Fancy, unsupported disambiguation page!' return summary # massage into plain text by concatenating paragraphs content = [] for para in soup.findAll('p'): content.append(str(para)) content = ' '.join(content) # clean up rendered text content = stripHTML(content) # strip markup content = Wiki._citations.sub('', content) # remove citations content = Wiki._parens.sub('', content) # remove parentheticals content = Wiki._whitespace.sub(' ', content) # compress whitespace content = Wiki._fix_punc.sub(r'\1', content) # fix punctuation content = content.strip() # strip whitespace # search error if title == self.error: return 'No results found for "%s"' % query # generate summary by adding as many sentences as possible before limit summary = '%s -' % title for sentence in Wiki._sentence.findall(content): if len(summary) + 1 + len(sentence) > self.summary_size: break summary += ' %s' % sentence return summary
def calculator(self, query): """Try to use google calculator for given query""" opts = dict(self.calcopts) opts[u'q'] = query doc = self.ua.open(self.search, opts=opts) if not self.reConversionDetected.search(doc): raise Exception, u'no conversion detected' response = self.reConversionResult.search(doc).group(1) # turn super scripts into utf8 parts = [] for part in self.sup_re.split(response): if self.sup_re.match(part): part = superscript(part) parts.append(part) response = u''.join(parts) return stripHTML(response)
def extractTextFrom(self, soup): text = soup.findAll(text=True) return utils.stripHTML(" ".join(filter(utils.isVisible, text)).encode('utf-8'))