Exemplo n.º 1
0
Arquivo: woot.py Projeto: gipi/Richie
    def response(self, nick, args, kwargs):
        try:
            feed = rssparser.parse(self.rssurl)

            # get latest entry and their homepage url
            title = feed['items'][0]['title'].split()
            offer = ' '.join(title[:-2])
            
            try:
                price = "$%.2f" % title[-1]
            except:
                price = ''

            longdescription = feed['items'][0]['description']
            page = feed['items'][0]['link']

            # strip out html
            longdescription = stripHTML(longdescription).strip()

            # these can get absurdly long
            if longdescription > self.max:
                longdescription = longdescription[:self.max-4] + ' ...'

            return '%s: %s\n[%s]\n%s' % (offer, price, page, longdescription)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page woot returned D:" % nick
Exemplo n.º 2
0
    def response(self, nick, args, kwargs):
        try:
            feed = rssparser.parse(self.rssurl)

            # get latest entry and their homepage url
            title = feed['items'][0]['title'].split()
            offer = ' '.join(title[:-2])

            try:
                price = "$%.2f" % title[-1]
            except:
                price = ''

            longdescription = feed['items'][0]['description']
            page = feed['items'][0]['link']

            # strip out html
            longdescription = stripHTML(longdescription).strip()

            # these can get absurdly long
            if longdescription > self.max:
                longdescription = longdescription[:self.max - 4] + ' ...'

            return '%s: %s\n[%s]\n%s' % (offer, price, page, longdescription)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page woot returned D:" % nick
Exemplo n.º 3
0
Arquivo: chp.py Projeto: gipi/Richie
    def response(self, nick, args, kwargs):
        query = args[0]
        try:
            check = self.clean.sub('', query)
            check = re.compile(check, re.I)

            results = []
            doc = geturl(self.url)
            for i in self.incidents.findall(doc):
                data = [stripHTML(c) for c in self.data.findall(i)][1:]
                if len(data) != 4:
                    continue
                if check.search(data[2]):
                    results.append('=> %s: %s - %s - %s' %
                                   (data[0], data[1], data[2], data[3]))

            if len(results) > 0:
                return '\n'.join(results)
            else:
                return '%s: No incidents found' % nick

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: I failed to perform that lookup' % nick
Exemplo n.º 4
0
    def parse_email(self, payload):
        try:
            message = email.message_from_string(payload)
            for part in message.walk():
                if part.get_content_maintype() == 'multipart':
                    continue
                mime_type = part.get_content_type()
                body = part.get_payload(decode=True)
                if mime_type == 'text/plain':
                    break
                elif mime_type == 'text/html':
                    body = stripHTML(body)
                    break

            for spam in self._spams:
                if spam in body:
                    body = body.replace(spam, '')

            body = body.strip()
            cleaned = []
            for line in body.splitlines():
                line = line.strip()
                if not len(line) or line.startswith('>'):
                    continue
                elif self._quoted.search(line) or line == self._sig:
                    break
                else:
                    cleaned.append(line)
            body = ' '.join(cleaned)
        except Exception, e:
            raise ParsingError, "couldn't parse payload: %s" % e
Exemplo n.º 5
0
    def response(self, nick, args, kwargs):
        try:
            try:
                user = args[0]
            except:
                user = None

            if user is None or user == '':
                doc = geturl(self.randomURL)
                user = re.search('"currentJournal": "(.*?)"', doc).group(1)

            url = urljoin(self.baseURL, '/users/%s/data/rss' % user)
            feed = rssparser.parse(url)

            # get latest entry and their homepage url
            entry = feed['items'][0]['description']
            page = feed['channel']['link']

            # strip out html
            entry = stripHTML(entry)

            # detect unusual amounts of high ascii, probably russian journal
            if isUTF8(entry):
                return '%s: Russian LJ :(' % nick

            # these can get absurdly long
            entry = entry[:self.max]

            return '%s: [%s] %s' % (nick, page, entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page LJ returned D:" % nick
Exemplo n.º 6
0
    def response(self, nick, args, kwargs):
        try:
            try:
                user = args[0]
            except:
                user = None

            if user is None or user == '':
                doc = geturl(self.randomURL)
                user = re.search('"currentJournal": "(.*?)"', doc).group(1)

            url = urljoin(self.baseURL, '/users/%s/data/rss' % user)
            feed = rssparser.parse(url)

            # get latest entry and their homepage url
            entry = feed['items'][0]['description']
            page = feed['channel']['link']

            # strip out html
            entry = stripHTML(entry)

            # detect unusual amounts of high ascii, probably russian journal
            if isUTF8(entry):
                return '%s: Russian LJ :(' % nick

            # these can get absurdly long
            entry = entry[:self.max]

            return '%s: [%s] %s' % (nick, page, entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page LJ returned D:" % nick
Exemplo n.º 7
0
    def response(self, nick, args, kwargs):
        word = args[0].lower()
        try:
            try:
                num = int(args[1])
            except:
                num = 1

            url = urljoin(self.base_url, word)
            doc = geturl(url)
            defs = self.re_defs.search(doc).group(1)
            defs = self.re_newline.sub('', defs)
            defs = self.re_def_break.split(defs)
            if len(defs) > 1:
                defs.pop(0)
            if num > len(defs):
                num = 1
            definition = defs[num - 1]
            definition = stripHTML(definition)
            definition = self.header.sub('', definition)

            return '%s: [%s/%s] %s' % (nick, num, len(defs), definition)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: I couldn't look that up for some reason.  D:" % nick
Exemplo n.º 8
0
def normalize(name):
    name = stripHTML(name)
    name = badchars.sub('', name)
    name = name.lower()
    name = name.strip()
    name = whitespace.sub(' ', name)
    return name
Exemplo n.º 9
0
 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name},
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p', style=None):
             for row in p.table('tr'):
                 link = row('td')[2].a
                 if self.normalize(link.renderContents()) == name:
                     url = urljoin(self.imdb_url, link['href'])
                     break
             if url:
                 break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     try:
         rating = soup.body.find('div', 'meta').b.renderContents()
     except AttributeError:
         rating = 'Unrated'
     return stripHTML(soup.title.renderContents()), rating
Exemplo n.º 10
0
Arquivo: chp.py Projeto: gipi/Richie
    def response(self, nick, args, kwargs):
        query = args[0]
        try:
            check = self.clean.sub('', query)
            check = re.compile(check, re.I)

            results = []
            doc = geturl(self.url)
            for i in self.incidents.findall(doc):
                data = [stripHTML(c) for c in self.data.findall(i)][1:]
                if len(data) != 4:
                    continue
                if check.search(data[2]):
                    results.append('=> %s: %s - %s - %s' % (data[0], data[1],
                        data[2], data[3]))

            if len(results) > 0:
                return '\n'.join(results)
            else:
                return '%s: No incidents found' % nick

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: I failed to perform that lookup' % nick
Exemplo n.º 11
0
    def response(self, nick, args, kwargs):
        word = args[0].lower()
        try:
            try:
                num = int(args[1])
            except:
                num = 1

            url = urljoin(self.base_url, word)
            doc = geturl(url)
            defs = self.re_defs.search(doc).group(1)
            defs = self.re_newline.sub('', defs)
            defs = self.re_def_break.split(defs)
            if len(defs) > 1:
                defs.pop(0)
            if num > len(defs):
                num = 1
            definition = defs[num - 1]
            definition = stripHTML(definition)
            definition = self.header.sub('', definition)

            return '%s: [%s/%s] %s' % (nick, num, len(defs), definition)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: I couldn't look that up for some reason.  D:" % nick
Exemplo n.º 12
0
 def response(self, nick, args, kwargs):
     try:
         source = self.sources[args[0]]
         try:
             query = args[1]
         except:
             query = None
         try:
             num = int(query)
             query = None
         except:
             num = None
         if num:
             url = source.bynum.replace(u'num', unicode(num))
             opts = None
         elif query:
             url = source.search
             opts = dict(source.opts)
             opts[source.query] = query
         else:
             url = source.random
             opts = None
         doc = geturl(url, opts=opts)
         entries = source.entries.findall(doc)
         if query:
             entries = filter(None, entries)
         entry = random.choice(entries)
         entry = stripHTML(entry).strip()
         return entry
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: %s' % (nick, self._error)
Exemplo n.º 13
0
    def parse_email(self, payload):
        try:
            message = email.message_from_string(payload)
            for part in message.walk():
                if part.get_content_maintype() == 'multipart':
                    continue
                mime_type = part.get_content_type()
                body = part.get_payload(decode=True)
                if mime_type == 'text/plain':
                    break
                elif mime_type == 'text/html':
                    body = stripHTML(body)
                    break

            for spam in self._spams:
                if spam in body:
                    body = body.replace(spam, '')

            body = body.strip()
            cleaned = []
            for line in body.splitlines():
                line = line.strip()
                if not len(line) or line.startswith('>'):
                    continue
                elif self._quoted.search(line) or line == self._sig:
                    break
                else:
                    cleaned.append(line)
            body = ' '.join(cleaned)
        except Exception, e:
            raise ParsingError, "couldn't parse payload: %s" % e
Exemplo n.º 14
0
 def response(self, nick, args, kwargs):
     try:
         item = feedparser.parse(self.url).entries[0]
         body = stripHTML(item.description).strip()
         return u' | '.join([item.link, body, item.updated])
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: %s' % (nick, error)
Exemplo n.º 15
0
 def lookup(self, term, idx=1):
     """Lookup term in dictionary"""
     url = urljoin(self.define_url, quote(term.lower()))
     soup = getsoup(url, referer=self.base_url)
     for br in soup('br'):
         br.extract()
     val = stripHTML(soup.renderContents().decode('utf-8'))
     val = val.replace(u'\xa0', ' ').replace('\n', ' ')
     return self.whitespace_re.sub(' ', val).strip()
Exemplo n.º 16
0
Arquivo: movie.py Projeto: gipi/Richie
def normalize(name):
    """Normalize a movie title for easy comparison"""
    name = stripHTML(name)
    name = year.sub('', name)
    name = badchars.sub(' ', name)
    name = name.lower()
    name = name.strip()
    name = whitespace.sub(' ', name)
    return name
Exemplo n.º 17
0
Arquivo: movie.py Projeto: gipi/Richie
def normalize(name):
    """Normalize a movie title for easy comparison"""
    name = stripHTML(name)
    name = year.sub('', name)
    name = badchars.sub(' ', name)
    name = name.lower()
    name = name.strip()
    name = whitespace.sub(' ', name)
    return name
Exemplo n.º 18
0
class Main(Base):
    def __init__(self, madcow):
        self.madcow = madcow
        self.enabled = madcow.config.twitter.enabled
        self.frequency = madcow.config.twitter.updatefreq
        self.output = madcow.config.twitter.channel
        self.api = twitter.Api()
        self.api.SetCache(None)  # this fills up /tmp :(
        self.api.SetUserAgent(
            'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)')
        self.api.SetCredentials(self.madcow.config.twitter.username,
                                self.madcow.config.twitter.password)
        self.__updatelast()

    def __updatelast(self):
        """Updates timestamp of last update."""
        self.lastupdate = time.gmtime()

    def __get_update_str(self):
        return time.strftime("%a, %d %b %Y %X GMT", self.lastupdate)

    def response(self, *args):
        """This is called by madcow, should return a string or None"""
        try:
            log.debug('getting tweets...')
            tweets = self.api.GetFriendsTimeline(since=self.__get_update_str())
        except Exception, e:
            try:
                if e.code == 304:
                    log.debug('no new tweets')
                    return
            except:
                pass
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return

        log.debug('found %s tweets, parsing')
        lines = []

        for t in reversed(tweets):
            if time.localtime(
                    t.GetCreatedAtInSeconds()
            ) < self.lastupdate:  # twitter fails sometimes, so we do our own filter..
                print "ignoring old tweet with timestamp %s (TWITTER SUCKS)" % t.created_at
                continue

            line = ">> tweet from %s: %s <<" % (t.user.screen_name,
                                                stripHTML(t.text))
            lines.append(line)

        self.__updatelast()

        if lines:
            return "\n".join(lines)
        else:
            return None
Exemplo n.º 19
0
Arquivo: aim.py Projeto: gipi/Richie
 def receiveMessage(self, user, multiparts, flags):
     req = Request(message=stripHTML(multiparts[0][0]))
     req.nick = user.name
     req.channel = 'AIM'
     req.private = True
     req.addressed = True
     req.aim = self
     log.info('[AIM] <%s> %s' % (req.nick, req.message))
     self.bot.checkAddressing(req)
     self.bot.process_message(req)
Exemplo n.º 20
0
 def extract_quote(self, obj):
     li = obj.find(u'li')
     contents = li.contents
     contents = [unicode(part) for part in contents]
     quote = u' '.join(contents)
     quote = stripHTML(quote)
     quote = _linebreak.sub(u' ', quote)
     quote = _whitespace.sub(u' ', quote)
     quote = quote.strip()
     return quote
Exemplo n.º 21
0
Arquivo: aim.py Projeto: gipi/Richie
 def receiveMessage(self, user, multiparts, flags):
     req = Request(message=stripHTML(multiparts[0][0]))
     req.nick = user.name
     req.channel = 'AIM'
     req.private = True
     req.addressed = True
     req.aim = self
     log.info('[AIM] <%s> %s' % (req.nick, req.message))
     self.bot.checkAddressing(req)
     self.bot.process_message(req)
Exemplo n.º 22
0
    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            if not query or query == 'headline':
                url = self._world_url
            else:
                url = self._search_url + urllib.quote(query)

            feed = rssparser.parse(url)
            item = feed['items'][0]
            url = item['link']
            title = stripHTML(item['title'])
            sum = stripHTML(item['description'])
            return '\n'.join((url, title, sum))

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: %s' % (nick, self._error)
Exemplo n.º 23
0
    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            if not query or query == 'headline':
                url = self._world_url
            else:
                url = self._search_url + urllib.quote(query)
                            
            feed = rssparser.parse(url)
            item = feed['items'][0]
            url = item['link']
            title = stripHTML(item['title'])
            sum = stripHTML(item['description'])
            return '\n'.join((url, title, sum))
            
        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: %s' % (nick, self._error)
Exemplo n.º 24
0
 def normalize(self, name):
     """Normalize a movie title for easy comparison"""
     name = stripHTML(name)
     name = self.year_re.sub('', name)              # strip trailing year
     name = self.rev_article_re.sub(r'\2 \1', name) # Movie, The = The Movie
     name = self.articles_re.sub('', name)          # strip leading the/an
     name = self.badchars_re.sub(' ', name)         # only allow alnum
     name = name.lower()                            # lowercase only
     name = name.strip()                            # strip whitespace
     name = self.whitespace_re.sub(' ', name)       # compress whitespace
     return name
Exemplo n.º 25
0
 def post(self, url, tags):
     try:
         html = self.ua.open(url, size=2048)
         title = stripHTML(self.title.search(html).group(1))
     except AttributeError:
         title = url
     opts = {u'url': url,
             u'description': title,
             u'tags': u' '.join(tags),
             u'replace': u'no',
             u'shared': u'yes'}
     self.ua.open(self.posturl, opts=opts)
Exemplo n.º 26
0
 def response(self, nick, args, kwargs):
     try:
         soup = getsoup(self.spec_url % int(args[0]) if args[0] else self.rand_url)
         soup.find('div', id='submit').extract()
         post = soup.body.find('div', 'post')
         return u'%s: (%d) %s' % (nick, int(post.find('a', 'fmllink')['href'].split('/')[-1]),
                                  stripHTML(' '.join(link.renderContents()
                                                     for link in post('a', 'fmllink')).decode('utf-8', 'ignore')))
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: Today I couldn\'t seem to access fmylife.com.. FML' % nick
Exemplo n.º 27
0
def get_text():
    page = geturl(url)
    soup = BeautifulSoup(page)
    texts = soup.body.findAll('div', 'post_content')
    text = random.choice(texts)
    text = text.renderContents()
    text = stripHTML(text)
    text = text.splitlines()
    text = [line.strip() for line in text]
    text = [line for line in text if line]
    text = u'\n'.join(text)
    return text
Exemplo n.º 28
0
 def response(self, nick, args, kwargs):
     try:
         opts = {'hl': 'en', 'aq': 'f', 'safe': 'off', 'q': args[0]}
         soup = getsoup(self.google_search, opts, referer=self.google_url)
         a = soup.body.find('a', 'spell')
         if a:
             res = stripHTML(a.renderContents().decode('utf-8', 'ignore'))
         else:
             res = u'spelled correctly'
     except Exception, error:
         log.warn('error in module %s' % self.__module__)
         log.exception(error)
         res = u'I had trouble with that'
Exemplo n.º 29
0
 def post(self, url, tags):
     try:
         html = self.ua.openurl(url, size=2048)
         title = stripHTML(self.title.search(html).group(1))
     except:
         title = url
     opts = {
         'url': url,
         'description': title,
         'tags': ' '.join(tags),
         'replace': 'no',
         'shared': 'yes',
     }
     self.ua.openurl(self.posturl, opts=opts)
Exemplo n.º 30
0
 def post(self, url, tags):
     try:
         html = self.ua.openurl(url, size=2048)
         title = stripHTML(self.title.search(html).group(1))
     except:
         title = url
     opts = {
         'url': url,
         'description': title,
         'tags': ' '.join(tags),
         'replace': 'no',
         'shared': 'yes',
     }
     self.ua.openurl(self.posturl, opts=opts)
Exemplo n.º 31
0
    def bodycount(self):

        try:
            doc = geturl(self._bodycount_url)
            data = self._bodycount_re.search(doc).group(1)
            data = data.decode('ascii', 'replace')
            data = stripHTML(data)
            data = self._re_whitespace.sub(u' ', data)
            data = data.strip()
            return data
        except Exception, error:
            log.warn(u'error in module %s' % self.__module__)
            log.exception(error)
            return u'UNKNOWN'
Exemplo n.º 32
0
Arquivo: war.py Projeto: gipi/Richie
 def bodycount(self):
     try:
         doc = geturl(IraqWar._bodycount_url)
         soup = BeautifulSoup(doc)
         data = soup.find('td', attrs={'class': 'main-num'})
         data = data.find('a')
         data = str(data.contents[0])
         data = stripHTML(data)
         data = IraqWar._re_whitespace.sub(' ', data)
         data = data.strip()
         return data
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'
Exemplo n.º 33
0
    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            doc = geturl(self.passage, opts={'search': query, 'version': 31})
            response = self.verse.search(doc).group(1)
            response = self.footnotes.sub('', response)
            response = self.junk_html.sub('', response)
            response = stripHTML(response)
            response = response.strip()
            return response[:self.max]
        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: God didn't like that." % nick
Exemplo n.º 34
0
Arquivo: hugs.py Projeto: gipi/Richie
 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick
Exemplo n.º 35
0
 def bodycount(self):
     try:
         doc = geturl(IraqWar._bodycount_url)
         soup = BeautifulSoup(doc)
         data = soup.find('td', attrs={'class': 'main-num'})
         data = data.find('a')
         data = str(data.contents[0])
         data = stripHTML(data)
         data = IraqWar._re_whitespace.sub(' ', data)
         data = data.strip()
         return data
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'
Exemplo n.º 36
0
 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick
Exemplo n.º 37
0
Arquivo: bible.py Projeto: gipi/Richie
    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            doc = geturl(self.passage, opts={'search': query, 'version': 31})
            response = self.verse.search(doc).group(1)
            response = self.footnotes.sub('', response)
            response = self.junk_html.sub('', response)
            response = stripHTML(response)
            response = response.strip()
            return response[:self.max]
        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: God didn't like that." % nick
Exemplo n.º 38
0
 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random, add_headers={'Accept': '*/*'})
         soup = BeautifulSoup(doc)
         main = soup.find(u'div', attrs={u'id': u'main'})
         confs = main.findAll(u'div', attrs={u'class': u'content'})
         conf = random.choice(confs)
         conf = [unicode(p) for p in conf.findAll(u'p')]
         conf = u' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: I had some issues with that..' % nick
Exemplo n.º 39
0
    def get_summary(self, query):
        soup, title = self.get_soup(query)

        # check if this is a disambiguation page, if so construct special page
        # there isn't a consistent style guide, so we just try to do the
        # most common format (ordered list of links). if this fails, return
        # a friendly failure for now
        if soup.find(u'div', attrs={u'id': u'disambig'}):
            try:
                summary = u'%s (Disambiguation) - ' % title
                for link in soup.find(u'ul').findAll(u'a'):
                    title = unicode(link[u'title']).strip()
                    if len(summary) + len(title) + 2 > self.summary_size:
                        break
                    if not summary.endswith(u' '):
                        summary += u', '
                    summary += title
            except:
                summary = u'Fancy, unsupported disambiguation page!'
            return summary

        # massage into plain text by concatenating paragraphs
        content = []
        for para in soup.findAll(u'p'):
            content.append(unicode(para))
        content = u' '.join(content)

        # clean up rendered text
        content = stripHTML(content)                 # strip markup
        content = Wiki._citations.sub(u'', content)   # remove citations
        content = Wiki._parens.sub(u'', content)      # remove parentheticals
        content = Wiki._whitespace.sub(u' ', content) # compress whitespace
        content = Wiki._fix_punc.sub(r'\1', content) # fix punctuation
        content = content.strip()                    # strip whitespace

        # search error
        if title == self.error:
            return u'No results found for "%s"' % query

        # generate summary by adding as many sentences as possible before limit
        summary = u'%s -' % title
        for sentence in Wiki._sentence.findall(content):
            if len(summary) + 1 + len(sentence) > self.summary_size:
                break
            summary += u' %s' % sentence
        return summary
Exemplo n.º 40
0
 def response(self, nick, args, kwargs):
     query = args[0]
     try:
         doc = geturl(self.search, opts={u'verbose': u'on', u'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = u'%s: %s - %s - %s' % (
                 nick, title, u', '.join(ingredients), instructions)
         response = stripHTML(response)
         return response
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u"%s: Something ungood happened looking that up, sry" % nick
Exemplo n.º 41
0
 def response(self, nick, args, kwargs):
     query = args[0]
     try:
         doc = geturl(self.search, opts={'verbose': 'on', 'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = '%s: %s - %s - %s' % (
             nick, title, ', '.join(ingredients), instructions)
         response = stripHTML(response)
         return response
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: Something ungood happened looking that up, sry" % nick
Exemplo n.º 42
0
Arquivo: movie.py Projeto: gipi/Richie
    def rate(self, movie):
        try:
            opts = dict(self.movie_opts)
            opts['ts'] = movie
            page = geturl(self.search, opts=opts)
            movie = normalize(movie)
            movies = self.result.findall(page)
            movies = [(path, normalize(title)) for path, title in movies]
            url = None
            for path, title in movies:
                if title == movie:
                    url = urljoin(self.baseurl, path)
                    break
            if not url:
                url = urljoin(self.baseurl, movies[0][0])
            page = geturl(url, referer=self.search)
            try:
                critic_rating = self.critic_rating.search(page).group(1)
                critic_rating = 'Critics: ' + critic_rating + '/100'
            except:
                critic_rating = None
            try:
                user_rating = self.user_rating.search(page).group(1)
                user_rating = 'Users: ' + user_rating + '/10'
            except:
                user_rating = None

            title = html_title.search(page).group(1)
            title = title.replace(': Reviews', '')

            response = 'Meta'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            ratings = [
                i for i in (critic_rating, user_rating) if i is not None
            ]
            ratings = ', '.join(ratings)
            if ratings:
                response += ' - %s' % ratings
            return response
        except:
            return
Exemplo n.º 43
0
 def response(self, nick, args, kwargs):
     query = args[0]
     if query is None or query == '':
         url = self.random_url
     else:
         query = ' '.join(query.split())
         query = query.replace(' ', '_')
         query = urllib.quote(query) + '.html'
         url = urljoin(self.baseurl, query)
     try:
         doc = geturl(url)
         results = self.artfart.findall(doc)
         result = random.choice(results)
         title, art = result
         art = stripHTML(art)
         return '>>> %s <<<\n%s' % (title, art)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: I had a problem with that, sorry." % nick
Exemplo n.º 44
0
 def response(self, nick, args, kwargs):
     try:
         fail = BeautifulSoup(geturl(self.url)).h1
         return self.spaces_re.sub(
             " ",
             stripHTML(
                 u"%s: %s: %s %s: %s"
                 % (
                     nick,
                     self.col("red", text="FAIL"),
                     self.fail_re.search(fail.renderContents()).group(1),
                     self.col("green", text="FIX"),
                     self.fail_re.search(fail.findNext("h1").renderContents()).group(1),
                 )
             ),
         )
     except Exception, error:
         log.warn("error in module %s" % self.__module__)
         log.exception(error)
         return u"%s: Too much fail for technobabble" % (nick, error)
Exemplo n.º 45
0
 def rate_rt(self, name):
     """Rating from rotten tomatoes"""
     page = geturl(self.rt_search, {'search': name}, referer=self.rt_url)
     soup = BeautifulSoup(page)
     for table in soup.body('table'):
         if table.caption.renderContents() == 'Movies':
             break
     else:
         raise ValueError('no movies found in search results')
     name = self.normalize(name)
     for row in table.tbody('tr'):
         link = row.a
         if self.normalize(link.renderContents()) == name:
             url = urljoin(self.rt_url, link['href'])
             break
     else:
         raise ValueError('no exact matches')
     soup = BeautifulSoup(geturl(url, referer=self.rt_search))
     info = soup.body.find('div', 'movie_info_area')
     return stripHTML(info.h1.renderContents()), info.a['title']
Exemplo n.º 46
0
 def response(self, nick, args, kwargs):
     query = args[0]
     if query is None or query == u'':
         url = self.random_url
     else:
         query = u' '.join(query.split())
         query = query.replace(u' ', u'_')
         query = urllib.quote(query) + u'.html'
         url = urljoin(self.baseurl, query)
     try:
         doc = geturl(url)
         results = self.artfart.findall(doc)
         result = random.choice(results)
         title, art = result
         art = stripHTML(art)
         return u'>>> %s <<<\n%s' % (title, art)
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u"%s: I had a problem with that, sorry." % nick
Exemplo n.º 47
0
Arquivo: bash.py Projeto: gipi/Richie
    def response(self, nick, args, kwargs):
        try:
            source = self.sources[args[0]]

            try:
                query = args[1]
            except:
                query = None

            try:
                num = int(query)
                query = None
            except:
                num = None

            if num:
                url = source.bynum.replace('num', str(num))
            elif query:
                url = source.search.replace('query', query)
            else:
                url = source.random

            doc = geturl(url)
            entries = source.entries.findall(doc)

            if query:
                entries = [entry for entry in entries if query in entry]

            if len(entries) > 1:
                entry = random.choice(entries)
            else:
                entry = entries[0]

            return stripHTML(entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: %s' % (nick, self._error)
Exemplo n.º 48
0
Arquivo: movie.py Projeto: gipi/Richie
    def rate(self, movie):
        """Get the freshness rating of a movie"""
        try:
            opts = {'sitesearch': 'rt', 'search': movie}
            page = geturl(self.search, opts=opts, referer=self.baseurl)
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(path, normalize(title)) for path, title in movies]

                # look for exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first one
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load page
                page = geturl(url, referer=self.search)

            # find rating
            title = self.movie_title.search(page).group(1)
            rating = self.rating.search(page).group(1)

            # construct response
            response = 'Freshness'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s' % rating
            return response

        except:
            return
Exemplo n.º 49
0
Arquivo: movie.py Projeto: gipi/Richie
    def rate(self, movie):
        """Get the rating for a movie"""
        try:
            page = geturl(self.search, opts={'s': 'all', 'q': movie})
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(y, z) for x, y, z in movies]
                movies = [(path, normalize(title)) for path, title in movies]
                movies = [(path, title) for path, title in movies if title]

                # see if we can find an exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first option returned
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load actual page & title
                page = geturl(url, referer=self.search)
                title = html_title.search(page).group(1)

            # get rating and generate response
            rating = self.rating.search(page).group(1)
            response = 'IMDB'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s/10' % rating
            return response

        except:
            return
Exemplo n.º 50
0
    def get_quote(self, symbol):
        url = Yahoo._quote_url.replace('SYMBOL', symbol)
        page = geturl(url)
        soup = BeautifulSoup(page)
        company = ' '.join([str(item) for item in soup.find('h1').contents])
        company = stripHTML(company)
        tables = soup.findAll('table')
        table = tables[0]
        rows = table.findAll('tr')
        data = {}
        current_value = 0.0
        open_value = 0.0
        for row in rows:
            key, val = row.findAll('td')
            key = str(key.contents[0])
            if key == 'Change:':
                try:
                    img = val.find('img')
                    alt = str(img['alt'])
                    val = alt + stripHTML(str(val.contents[0]))
                except:
                    val = '0.00%'
            elif key == 'Ask:':
                continue
            else:
                val = stripHTML(str(val.contents[0]))

            val = val.replace(',', '')
            if Yahoo._isfloat.search(val):
                val = float(val)

            data[key] = val

            if key == 'Last Trade:' or key == 'Index Value:':
                current_value = val

            elif key == 'Prev Close:':
                open_value = val

        # see if we can calculate percentage
        try:
            change = 100 * (current_value - open_value) / open_value
            data['Change:'] += ' (%.2f%%)' % change
        except:
            pass

        # try and colorize the change field
        try:
            if 'Up' in data['Change:']:
                data['Change:'] = self._green + data['Change:'] + self._reset
            elif 'Down' in data['Change:']:
                data['Change:'] = self._red + data['Change:'] + self._reset
        except:
            pass

        # build friendly output
        output = []
        for key, val in data.items():
            if isinstance(val, float):
                val = '%.2f' % val
            output.append('%s %s' % (key, val))

        return '%s - ' % company + ' | '.join(output)
Exemplo n.º 51
0
    def forecast(self, location):
        page = geturl(url=self.search, opts={'query': location},
                referer=self.baseurl)
        soup = BeautifulSoup(page)

        # disambiguation page
        if 'Search Results' in str(soup):
            table = soup.find('table', attrs={'class': 'boxB full'})
            rows = table.findAll('tr')
            results = []
            match = None
            for row in rows:
                cells = row.findAll('td', attrs={'class': 'sortC'})
                for cell in cells:
                    link = cell.find('a')
                    if link is None or 'addfav' in str(link['href']):
                        continue
                    city = str(link.contents[0])
                    href = urljoin(self.baseurl, str(link['href']))
                    results.append(city)
                    if city.lower() == location.lower():
                        match = urljoin(self.baseurl, href)
                        break
                if match:
                    break
            if match:
                page = geturl(url=match)
                soup = BeautifulSoup(page)
            else:
                return 'Multiple results found: %s' % ', '.join(results)

        rss_url = soup.find('link', attrs=self._rss_link)['href']
        rss = rssparser.parse(rss_url)
        title = str(soup.find('h1').string).strip()
        conditions = stripHTML(rss['items'][0]['description'])
        fields = self._bar.split(conditions)
        data = {}
        for field in fields:
            try:
                key, val = self._keyval.search(field).groups()
                data[key] = val
            except:
                pass

        try:
            temp = float(self._tempF.search(data['Temperature']).group(1))
            blink = False
            if temp < 0:
                color = 6
            elif temp >=0 and temp < 40:
                color = 2
            elif temp >= 40 and temp < 60:
                color = 10
            elif temp >= 60 and temp < 80:
                color = 3
            elif temp >= 80 and temp < 90:
                color = 7
            elif temp >= 90 and temp < 100:
                color = 5
            elif temp >= 100:
                color = 5
                blink = True
            data['Temperature'] = '\x03%s\x16\x16%s\x0F' % (color,
                    data['Temperature'])
            if blink:
                data['Temperature'] = '\x1b[5m' + data['Temperature'] + \
                        '\x1b[0m'

        except:
            pass

        output = []
        for key, val in data.items():
            line = '%s: %s' % (key, val)
            output.append(line)

        output = ' | '.join(output)

        return '%s: %s' % (title, output)