Python geturl示例，madcow.util.http.geturl Python示例

示例#1

0

显示文件

文件： weather.py 项目： Jzarecta/madcow

    def personal_station(self, location):
        """gets weather data from a personal weather station"""
        try:
            if re.match("[A-Z]{8}\d+", location):  # already a PWSid
                pws_id = location
            else:
                page = geturl(url=self.locate_url, opts={u"query": location}, referer=self.base_url).encode("utf-8")

                xml = ElementTree.fromstring(page)
                pws_id = xml.find(".//pws/station[1]/id").text

            page = geturl(url=self.pws_url, opts={u"ID": pws_id}).encode("utf-8")

            xml = ElementTree.fromstring(page)

            loc = xml.find("location/full").text
            time = xml.find("observation_time_rfc822").text
            conditions = "N/A"
            temp = float(xml.find("temp_f").text)
            tempstr = xml.find("temperature_string").text
            humidity = xml.find("relative_humidity").text + "%"
            wind = xml.find("wind_string").text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u"error in module %s" % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#2

0

显示文件

文件： movie.py 项目： ToxicFrog/lancow

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p'):
             if p.b is not None:
                 section = p.b.renderContents()
                 if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'):
                     for a in p('a'):
                         text = a.renderContents()
                         if text:
                             normalized = self.normalize(text)
                             if normalized == name:
                                 url = urljoin(self.imdb_url, a['href'])
                                 break
                     if url:
                         break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(' - IMDb', ''))
     return realname, rating

示例#3

0

显示文件

文件： weather.py 项目： linuxpng/madcow

    def personal_station(self, location):
        '''gets weather data from a personal weather station'''
        try:
            if re.match('[A-Z]{8}\d+', location): # already a PWSid
                pws_id = location
            else:
                page = geturl(url=self.locate_url, opts={u'query':location},
                              referer=self.base_url).encode('utf-8')
                                                
                xml = ElementTree.fromstring(page)
                pws_id = xml.find('.//pws/station[1]/id').text
            
            page = geturl(url=self.pws_url, opts={u'ID':pws_id}).encode('utf-8')

            xml = ElementTree.fromstring(page)

            loc = xml.find('location/full').text
            time = xml.find('observation_time_rfc822').text
            conditions = 'N/A'
            temp = float(xml.find('temp_f').text)
            tempstr = xml.find('temperature_string').text
            humidity = xml.find('relative_humidity').text + '%'
            wind = xml.find('wind_string').text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#4

0

显示文件

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {
         's': 'tt',
         'q': name
     },
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p'):
             if p.b is not None:
                 section = p.b.renderContents()
                 if section in ('Titles (Exact Matches)', 'Popular Titles',
                                'Titles (Partial Matches)'):
                     for a in p('a'):
                         text = a.renderContents()
                         if text:
                             normalized = self.normalize(text)
                             if normalized == name:
                                 url = urljoin(self.imdb_url, a['href'])
                                 break
                     if url:
                         break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(
         ' - IMDb', ''))
     return realname, rating

示例#5

0

显示文件

文件： slut.py 项目： Havvy/madcow

def slutrating(phrase):

    phrase = cleanurl(phrase)

    try:
        data = geturl(searchURL, opts={u'q': phrase, u'safe': u'off'})
        unsafe = int(match_re.search(data).group(1).replace(u',', u''))
    except AttributeError:
        unsafe = 0

    try:
        data = geturl(searchURL, opts={u'q': phrase, u'safe': u'active'})
        try:
            filtered = filter_re.search(data).group(1)
            raise WordFiltered(filtered)
        except AttributeError:
            pass
        safe = int(match_re.search(data).group(1).replace(u',', u''))
    except AttributeError:
        safe = 0

    value = float(unsafe - safe) / float(unsafe)
    if value < 0:
        value = 0
    return value

示例#6

0

显示文件

文件： weather.py 项目： seunboi4u/madcow

    def personal_station(self, location):
        '''gets weather data from a personal weather station'''
        try:
            if re.match('[A-Z]{8}\d+', location):  # already a PWSid
                pws_id = location
            else:
                page = geturl(url=self.locate_url,
                              opts={
                                  u'query': location
                              },
                              referer=self.base_url).encode('utf-8')

                xml = ElementTree.fromstring(page)
                pws_id = xml.find('.//pws/station[1]/id').text

            page = geturl(url=self.pws_url, opts={
                u'ID': pws_id
            }).encode('utf-8')

            xml = ElementTree.fromstring(page)

            loc = xml.find('location/full').text
            time = xml.find('observation_time_rfc822').text
            conditions = 'N/A'
            temp = float(xml.find('temp_f').text)
            tempstr = xml.find('temperature_string').text
            humidity = xml.find('relative_humidity').text + '%'
            wind = xml.find('wind_string').text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#7

0

显示文件

def slutrating(phrase):

    phrase = cleanurl(phrase)

    try:
        data = geturl(searchURL, opts={u'q': phrase, u'safe': u'off'})
        unsafe = int(match_re.search(data).group(1).replace(u',', u''))
    except AttributeError:
        unsafe = 0

    try:
        data = geturl(searchURL, opts={u'q': phrase, u'safe': u'active'})
        try:
            filtered = filter_re.search(data).group(1)
            raise WordFiltered(filtered)
        except AttributeError:
            pass
        safe = int(match_re.search(data).group(1).replace(u',', u''))
    except AttributeError:
        safe = 0

    value = float(unsafe - safe) / float(unsafe)
    if value < 0:
        value = 0
    return value

示例#8

0

显示文件

文件： movie.py 项目： grundleborg/madcow

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name, 'exact': 'true'}, referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'Find - IMDb':
         url = urljoin(self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href'])
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(' - IMDb', ''))
     return realname, rating

示例#9

0

显示文件

文件： webtender.py 项目： Havvy/madcow

 def response(self, nick, args, kwargs):
     try:
         query = args[0]
         doc = geturl(self.search, opts={u'verbose': u'on', u'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = strip_html(u'%s - %s - %s' % (title, u', '.join(ingredients), instructions))
     except Exception, error:
         response = u"That's a made-up drink, sorry."

示例#10

0

显示文件

文件： bash.py 项目： Havvy/madcow

 def response(self, nick, args, kwargs):
     source = self.sources[args[0]]
     try:
         query = args[1]
     except:
         query = None
     try:
         num = int(query)
         query = None
     except:
         num = None
     if num:
         url = source.bynum.replace(u'num', unicode(num))
         opts = None
     elif query:
         url = source.search
         opts = dict(source.opts)
         opts[source.query] = query
     else:
         url = source.random
         opts = None
     doc = geturl(url, opts=opts)
     entries = source.entries.findall(doc)
     if query:
         entries = filter(None, entries)
     entry = random.choice(entries)
     return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))

示例#11

0

显示文件

文件： yelp.py 项目： Spacexplosion/Quaking-Mad-Cow

    def response(self, nick, args, kwargs):
        # sanity check args and pick default search location
        desc, loc = args
        if desc.startswith('@') and not loc:
            raise Exception('invalid search')
        if not loc:
            if self.learn:
                loc = self.learn.lookup(u'location', nick)
            if not loc:
                loc = self.default_location

        # perform search
        opts = opts={'find_desc': desc, 'ns': 1, 'find_loc': loc, 'rpp': 1}
        page = geturl(SEARCHURL, opts)

        # extract meaningful data from first result
        soup = BeautifulSoup(page, convertEntities='html')
        result = soup.body.find('div', 'businessresult clearfix')
        name = result.find('a', id='bizTitleLink0').findAll(text=True)
        name = clean_re.search(u''.join(name)).group(1)
        cat = result.find('div', 'itemcategories').a.renderContents()
        rating = result.find('div', 'rating').img['alt']
        rating = rating.replace(' star rating', '')
        reviews = result.find('span', 'reviews').renderContents()
        address = [i.strip() for i in result.address.findAll(text=True)]
        address = u', '.join(part for part in address if part)
        url = urljoin(BASEURL, result.find('a', id='bizTitleLink0')['href'])

        # return rendered page
        return RESULT_FMT % {'nick': nick, 'name': name, 'cat': cat,
                             'rating': rating, 'reviews': reviews,
                             'address': address, 'url': url}

示例#12

0

显示文件

 def response(self, nick, args, kwargs):
     source = self.sources[args[0]]
     try:
         query = args[1]
     except:
         query = None
     try:
         num = int(query)
         query = None
     except:
         num = None
     if num:
         url = source.bynum.replace(u'num', unicode(num))
         opts = None
     elif query:
         url = source.search
         opts = dict(source.opts)
         opts[source.query] = query
     else:
         url = source.random
         opts = None
     doc = geturl(url, opts=opts)
     entries = source.entries.findall(doc)
     if query:
         entries = filter(None, entries)
     entry = random.choice(entries)
     return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))

示例#13

0

显示文件

文件： translate.py 项目： seunboi4u/madcow

 def translate(self, text, src, dst):
     """Perform the translation"""
     opts = {'client': 't',
             'text': text,
             'hl': 'en',
             'sl': self.langs[src],
             'tl': self.langs[dst],
             'ie': 'UTF-8',
             'oe': 'UTF-8',
             'multires': '0',
             'prev': 'btn',
             'ssel': '4',
             'tsel': '4',
             'sc': '1'}
     url = 'http://translate.google.com/translate_a/t'
     res = geturl(url, opts)
     while u',,' in res:
         res = res.replace(u',,', u',"",')
     res = simplejson.loads(res)
     try:
         det = self.lookup[res[2]].capitalize()
     except StandardError:
         det = None
     while isinstance(res, list) and res:
         res = res[0]
     if src == 'auto' and det:
         res = u'[detected %s] %s' % (det, res)
     if res:
         return res

示例#14

0

显示文件

文件： trek.py 项目： Havvy/madcow

 def response(self, nick, args, kwargs):
     fail = BeautifulSoup(geturl(self.url)).h1
     return self.spaces_re.sub(' ', strip_html(
         u'%s: %s: %s %s: %s' % (
             nick, self.col('red', text='FAIL'),
             self.fail_re.search(fail.renderContents()).group(1),
             self.col('green', text='FIX'),
             self.fail_re.search(fail.findNext('h1').renderContents()).group(1))))

示例#15

0

显示文件

文件： delicious.py 项目： Spacexplosion/Quaking-Mad-Cow

 def request(self, url, parameters):
     request = oauth.Request.from_consumer_and_token(self.consumer,
                                                     token=self.token,
                                                     http_method='GET',
                                                     http_url=url,
                                                     parameters=parameters)
     request.sign_request(self.method, self.consumer, self.token)
     return geturl(request.to_url().replace('+', '%20'))

示例#16

0

显示文件

 def request(self, url, parameters):
     request = oauth.Request.from_consumer_and_token(self.consumer,
                                                     token=self.token,
                                                     http_method='GET',
                                                     http_url=url,
                                                     parameters=parameters)
     request.sign_request(self.method, self.consumer, self.token)
     return geturl(request.to_url().replace('+', '%20'))

示例#17

0

显示文件

文件： terror.py 项目： Havvy/madcow

 def time(self):
     try:
         doc = geturl(DoomsDay._url)
         time = self._re_time.search(doc).group(1)
         return time
     except Exception, error:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(error)
         return u'UNKNOWN'

示例#18

0

显示文件

 def time(self):
     try:
         doc = geturl(DoomsDay._url)
         time = self._re_time.search(doc).group(1)
         return time
     except Exception, error:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(error)
         return u'UNKNOWN'

示例#19

0

显示文件

文件： translate.py 项目： Spacexplosion/Quaking-Mad-Cow

 def translate(self, text, src, dst):
     """Perform the translation"""
     opts = {'langpair': '%s|%s' % (self.langs[src], self.langs[dst]), 'v': '1.0', 'q': text}
     res = simplejson.loads(geturl(self.url, opts))['responseData']
     text = strip_html(res['translatedText'])
     try:
         text = u'[detected %s] %s' % (self.lookup[res['detectedSourceLanguage']].capitalize(), text)
     except KeyError:
         pass
     return text

示例#20

0

显示文件

文件： terror.py 项目： Spacexplosion/Quaking-Mad-Cow

 def level(self):
     try:
         doc = geturl(Terror._url)
         level = self._re_level.search(doc).group(1)
         color = self._color_map[level.lower()]
         return self.colorlib.get_color(color, text=level)
     except Exception, error:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(error)
         return u'UNKNOWN'

示例#21

0

显示文件

文件： weather.py 项目： sempernex/madcow

 def forecast(self, location):
     '''get weather forecast'''
     try:
         page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8')
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find('.//fcttext').text)
     except Exception, e:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location

示例#22

0

显示文件

文件： weather.py 项目： Jzarecta/madcow

 def forecast(self, location):
     """get weather forecast"""
     try:
         page = geturl(url=self.forecast_url, opts={u"query": location}).encode("utf-8")
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find(".//fcttext").text)
     except Exception, e:
         self.log.warn(u"error in module %s" % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location

示例#23

0

显示文件

文件： weather.py 项目： linuxpng/madcow

 def forecast(self, location):
     '''get weather forecast'''
     try:
         page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8')
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find('.//fcttext').text)
     except Exception, e:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location

示例#24

0

显示文件

文件： trek.py 项目： seunboi4u/madcow

 def response(self, nick, args, kwargs):
     fail = BeautifulSoup(geturl(self.url)).h1
     return self.spaces_re.sub(
         ' ',
         strip_html(u'%s: %s: %s %s: %s' %
                    (nick, self.col('red', text='FAIL'),
                     self.fail_re.search(fail.renderContents()).group(1),
                     self.col('green', text='FIX'),
                     self.fail_re.search(
                         fail.findNext('h1').renderContents()).group(1))))

示例#25

0

显示文件

文件： movie.py 项目： seunboi4u/madcow

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {
         's': 'tt',
         'q': name,
         'exact': 'true'
     },
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'Find - IMDb':
         url = urljoin(
             self.imdb_url,
             soup.body.find('table',
                            'findList').tr.find('td',
                                                'result_text').a['href'])
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(
         ' - IMDb', ''))
     return realname, rating

示例#26

0

显示文件

文件： hugs.py 项目： sempernex/madcow

 def response(self, nick, args, kwargs):
     doc = geturl(self.random, add_headers={'Accept': '*/*'})
     soup = BeautifulSoup(doc)
     main = soup.find(u'div', attrs={u'id': u'main'})
     confs = main.findAll(u'div', attrs={u'class': u'content'})
     conf = random.choice(confs)
     conf = [unicode(p) for p in conf.findAll(u'p')]
     conf = u' '.join(conf)
     conf = strip_html(conf)
     conf = conf.strip()
     return conf

示例#27

0

显示文件

文件： hugs.py 项目： Spacexplosion/Quaking-Mad-Cow

 def response(self, nick, args, kwargs):
     doc = geturl(self.random, add_headers={'Accept': '*/*'})
     soup = BeautifulSoup(doc)
     main = soup.find(u'div', attrs={u'id': u'main'})
     confs = main.findAll(u'div', attrs={u'class': u'content'})
     conf = random.choice(confs)
     conf = [unicode(p) for p in conf.findAll(u'p')]
     conf = u' '.join(conf)
     conf = strip_html(conf)
     conf = conf.strip()
     return conf

示例#28

0

显示文件

    def bodycount(self):

        try:
            doc = geturl(self._bodycount_url)
            data = self._bodycount_re.search(doc).group(1)
            data = decode(data, 'ascii')
            data = strip_html(data)
            data = self._re_whitespace.sub(u' ', data)
            data = data.strip()
            return data
        except Exception, error:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(error)
            return u'UNKNOWN'

示例#29

0

显示文件

文件： terror.py 项目： Havvy/madcow

    def bodycount(self):

        try:
            doc = geturl(self._bodycount_url)
            data = self._bodycount_re.search(doc).group(1)
            data = decode(data, 'ascii')
            data = strip_html(data)
            data = self._re_whitespace.sub(u' ', data)
            data = data.strip()
            return data
        except Exception, error:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(error)
            return u'UNKNOWN'

示例#30

0

显示文件

文件： movie.py 项目： Spacexplosion/Quaking-Mad-Cow

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p', style=None):
             for row in p.table('tr'):
                 link = row('td')[2].a
                 normalized = self.normalize(link.renderContents())
                 self.log.debug('Comparing: %r == %r', normalized, name)
                 if normalized == name:
                     url = urljoin(self.imdb_url, link['href'])
                     break
             if url:
                 break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = strip_html(soup.find('span', 'rating-rating').renderContents())
     realname = strip_html(soup.title.renderContents().replace(' - IMDb', ''))
     return realname, rating

示例#31

0

显示文件

文件： artfart.py 项目： Havvy/madcow

 def response(self, nick, args, kwargs):
     query = args[0]
     if query is None or query == u'':
         url = self.random_url
     else:
         query = u' '.join(query.split())
         query = query.replace(u' ', u'_')
         query = urllib.quote(query) + u'.html'
         url = urljoin(self.baseurl, query)
     doc = geturl(url)
     results = self.artfart.findall(doc)
     result = random.choice(results)
     title, art = result
     art = strip_html(art)
     return u'>>> %s <<<\n%s' % (title, art)

示例#32

0

显示文件

    def get_soup(self, query):
        if isinstance(query, (list, tuple)):
            query = u' '.join(query)

        # load page
        if query == u'random':
            opts = {}
            url = urljoin(self.base_url, self.random_path)
        else:
            opts = {u'search': query, u'go': u'Go'}
            url = urljoin(self.base_url, self.search_path)
        page = geturl(url,
                      referer=self.base_url,
                      opts=opts,
                      size=self.sample_size)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)

        # extract title minus WP advert
        title = soup.title.string.replace(self.advert, u'')

        # remove all tabular data/sidebars
        for table in soup.findAll(u'table'):
            table.extract()

        # remove disambiguation links
        for dablink in soup.findAll(u'div', attrs={u'class': u'dablink'}):
            dablink.extract()

        # remove latitude/longitude metadata for places
        for coord in soup.findAll(u'span', attrs={u'id': u'coordinates'}):
            coord.extract()

        # strip non-english content wrappers
        for span in soup.findAll(u'span', attrs={u'lang': True}):
            span.extract()

        # remove IPA pronounciation guidelines
        for span in soup.findAll(u'span', attrs={u'class': u'IPA'}):
            span.extract()
        for link in soup.findAll(u'a', text=u'IPA'):
            link.extract()
        for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}):
            span.extract()

        return soup, title

示例#33

0

显示文件

文件： wikiquotes.py 项目： Havvy/madcow

    def get_soup(self, query):
        if isinstance(query, (list, tuple)):
            query = u' '.join(query)

        # load page
        if query == u'random':
            opts = {}
            url = urljoin(self.base_url, self.random_path)
        else:
            opts = {u'search': query, u'go': u'Go'}
            url = urljoin(self.base_url, self.search_path)
        page = geturl(url, referer=self.base_url, opts=opts,
                      size=self.sample_size)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)

        # extract title minus WP advert
        title = soup.title.string.replace(self.advert, u'')

        # remove all tabular data/sidebars
        for table in soup.findAll(u'table'):
            table.extract()

        # remove disambiguation links
        for dablink in soup.findAll(u'div', attrs={u'class': u'dablink'}):
            dablink.extract()

        # remove latitude/longitude metadata for places
        for coord in soup.findAll(u'span', attrs={u'id': u'coordinates'}):
            coord.extract()

        # strip non-english content wrappers
        for span in soup.findAll(u'span', attrs={u'lang': True}):
            span.extract()

        # remove IPA pronounciation guidelines
        for span in soup.findAll(u'span', attrs={u'class': u'IPA'}):
            span.extract()
        for link in soup.findAll(u'a', text=u'IPA'):
            link.extract()
        for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}):
            span.extract()

        return soup, title

示例#34

0

显示文件

文件： joke.py 项目： seunboi4u/madcow

    def response(self, nick, args, kwargs):
        query = args[0]
        if query is None or query == u'':
            url = self.random_url
        else:
            query = u' '.join(query.split())
            query = query.replace(u' ', u'_')
            query = encode(query, 'utf-8')
            query = urllib.quote(query) + u'.php'
            url = urljoin(self.baseurl, query)
        doc = geturl(url)
        result = self.joke.findall(doc)[0]
        result = strip_html(result)

        # cleanup output a bit.. some funny whitespace in it -cj
        result = result.replace(u'\x14', u' ')
        result = result.replace(u'\n', u' ')
        result = re.sub(r'\s{2,}', u' ', result)
        return result.strip()

示例#35

0

显示文件

文件： weather.py 项目： sempernex/madcow

    def official_station(self, location):
        '''gets weather data from an official station (typically an airport)'''
        try:
            page = geturl(url=self.station_url, opts={u'query':location}).encode('utf-8')
            xml = ElementTree.fromstring(page)

            loc = xml.find('display_location/full').text
            time = xml.find('local_time').text
            conditions = xml.find('weather').text
            temp = float(xml.find('temp_f').text)
            tempstr = xml.find('temperature_string').text
            humidity = xml.find('relative_humidity').text
            wind = xml.find('wind_string').text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#36

0

显示文件

文件： joke.py 项目： Spacexplosion/Quaking-Mad-Cow

    def response(self, nick, args, kwargs):
        query = args[0]
        if query is None or query == u'':
            url = self.random_url
        else:
            query = u' '.join(query.split())
            query = query.replace(u' ', u'_')
            query = query.encode('utf-8', 'replace')
            query = urllib.quote(query) + u'.php'
            url = urljoin(self.baseurl, query)
        doc = geturl(url)
        result = self.joke.findall(doc)[0]
        result = strip_html(result)

        # cleanup output a bit.. some funny whitespace in it -cj
        result = result.replace(u'\x14', u' ')
        result = result.replace(u'\n', u' ')
        result = re.sub(r'\s{2,}', u' ', result)
        return result.strip()

示例#37

0

显示文件

文件： weather.py 项目： linuxpng/madcow

    def official_station(self, location):
        '''gets weather data from an official station (typically an airport)'''
        try:
            page = geturl(url=self.station_url, opts={u'query':location}).encode('utf-8')
            xml = ElementTree.fromstring(page)

            loc = xml.find('display_location/full').text
            time = xml.find('local_time').text
            conditions = xml.find('weather').text
            temp = float(xml.find('temp_f').text)
            tempstr = xml.find('temperature_string').text
            humidity = xml.find('relative_humidity').text
            wind = xml.find('wind_string').text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#38

0

显示文件

文件： weather.py 项目： Jzarecta/madcow

    def official_station(self, location):
        """gets weather data from an official station (typically an airport)"""
        try:
            page = geturl(url=self.station_url, opts={u"query": location}).encode("utf-8")
            xml = ElementTree.fromstring(page)

            loc = xml.find("display_location/full").text
            time = xml.find("local_time").text
            conditions = xml.find("weather").text
            temp = float(xml.find("temp_f").text)
            tempstr = xml.find("temperature_string").text
            humidity = xml.find("relative_humidity").text
            wind = xml.find("wind_string").text

            return self._format_weather(locals())
        except Exception, e:
            self.log.warn(u"error in module %s" % self.__module__)
            self.log.exception(e)
            return "error looking up conditions for location: %s" % location

示例#39

0

显示文件

文件： election.py 项目： Spacexplosion/Quaking-Mad-Cow

 def response(self, nick, args, kwargs):
     page = geturl(self._baseurl)
     try:
         score = self._score_re.search(page).group(1)
         dem = self._dem_re.search(score).groups()
         gop = self._gop_re.search(score).groups()
         # XXX diebold patch :D
         #dem, gop = (dem[0], gop[1]), (gop[0], dem[1])
         tie = None
         try:
             tie = self._tie_re.search(score).groups()
         except AttributeError:
             pass
     except AttributeError:
         raise Exception(u"couldn't parse page")
     output = [self.colorize(u'blue', *dem), self.colorize(u'red', *gop)]
     if tie:
         output.append(self.colorize(u'white', *tie))
     return u'%s: Projected Senate Seats 2010: %s' % (nick, u', '.join(output))

示例#40

0

显示文件

文件： stockquote.py 项目： seunboi4u/madcow

    def get_quote(self, symbols):
        """Looks up the symbol from finance.yahoo.com, returns formatted result"""
        symbols = [quote(symbol) for symbol in symbols.split()]
        url = Yahoo._quote_url.replace(u'SYMBOL', "+".join(symbols))
        page = geturl(url)

        results = []
        for line in page.splitlines():
            data = csv.reader([line]).next()
            symbol = data[0]
            name = data[1]
            trade_time, last_trade = strip_html(data[3]).split(" - ")
            last_trade = locale.atof(last_trade)
            try:
                last_close = locale.atof(data[2])
                exchange = False
            except ValueError:
                last_close = last_trade
                exchange = True

            if trade_time == "N/A":
                trade_time = u'market close'

            if exchange:
                results.append(u'%s (%s) - %s: %.4f' % (name, symbol, trade_time, last_trade))
            else:
                delta = last_trade - last_close
                try:
                    delta_perc = delta * 100.0 / last_close
                except ZeroDivisionError:
                    delta_perc = 0.00
                if delta < 0:
                    color = u'red'
                elif delta > 0:
                    color = u'green'
                else:
                    color = u'white'
                text = self.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc))
                results.append(u'%s (%s) - Open: %.2f | %s: %s' % (name, symbol, last_close, trade_time, text))


        return u'\n'.join(results)

示例#41

0

显示文件

文件： movie.py 项目： Spacexplosion/Quaking-Mad-Cow

 def gettop(self):
     """Get box office ratings"""
     soup = BeautifulSoup(geturl(self.imdb_top))
     table = soup.body.find('div', id='boxoffice').table
     data = []
     for row in table('tr')[1:]:
         items = row('td')
         data.append({'title': strip_html(items[2].a.renderContents()),
                      'weekend': items[3].renderContents().strip(),
                      'gross': items[4].renderContents().strip()})
     tsize = max(len(item['title']) for item in data)
     wsize = max(len(item['weekend']) for item in data)
     gsize = max(len(item['gross']) for item in data)
     output = ['# / Title / Weekend / Gross']
     for i, item in enumerate(data):
         output.append('%s %s - %s / %s' % (
                 str(i + 1).rjust(2),
                 item['title'].ljust(tsize),
                 item['weekend'].ljust(wsize),
                 item['gross'].ljust(gsize)))
     return '\n'.join(output)

示例#42

0

显示文件

 def gettop(self):
     """Get box office ratings"""
     soup = BeautifulSoup(geturl(self.imdb_top))
     table = soup.body.find('div', id='boxoffice').table
     data = []
     for row in table('tr')[1:]:
         items = row('td')
         data.append({
             'title': strip_html(items[2].a.renderContents()),
             'weekend': items[3].renderContents().strip(),
             'gross': items[4].renderContents().strip()
         })
     tsize = max(len(item['title']) for item in data)
     wsize = max(len(item['weekend']) for item in data)
     gsize = max(len(item['gross']) for item in data)
     output = ['# / Title / Weekend / Gross']
     for i, item in enumerate(data):
         output.append(
             '%s %s - %s / %s' %
             (str(i + 1).rjust(2), item['title'].ljust(tsize),
              item['weekend'].ljust(wsize), item['gross'].ljust(gsize)))
     return '\n'.join(output)

示例#43

0

显示文件

    def translate(self, text, src, dst):
        """Perform the translation"""
        opts = {'q': text,
                'client': 't',
                'sl': 'en',
                'tl': 'es',
                'hl': 'en',
                'dt': 'at',
                'dt': 'bd',
                'dt': 'ex',
                'dt': 'ld',
                'dt': 'md',
                'dt': 'qca',
                'dt': 'rw',
                'dt': 'rm',
                'dt': 'ss',
                'dt': 't',
                'ie': 'UTF-8',
                'oe': 'UTF-8',
                'ssel': '3',
                'tsel': '3',
                'kc': '0',
                'tk': '286462.160648'}

        url = 'http://translate.google.com/translate_a/single'
        res = geturl(url, opts, referer='https://translate.google.com/')
        while u',,' in res:
            res = res.replace(u',,', u',"",')
        res = simplejson.loads(res)
        try:
            det = self.lookup[res[2]].capitalize()
        except StandardError:
            det = None
        while isinstance(res, list) and res:
            res = res[0]
        if src == 'auto' and det:
            res = u'[detected %s] %s' % (det, res)
        if res:
            return res

示例#44

0

显示文件

文件： yelp.py 项目： seunboi4u/madcow

    def _response(self, nick, args, kwargs):
        # sanity check args and pick default search location
        desc, loc = args
        if desc.startswith('@') and not loc:
            raise Exception('invalid search')
        if not loc:
            if self.learn:
                loc = self.learn.lookup(u'location', nick)
            if not loc:
                loc = self.default_location

        # perform search
        opts = opts = {'find_desc': desc, 'ns': 1, 'find_loc': loc, 'rpp': 1}
        page = geturl(SEARCHURL, opts)

        # extract meaningful data from first result
        soup = BeautifulSoup(page, convertEntities='html')
        result = soup.body.find('div', 'businessresult clearfix')
        name = result.find('a', id='bizTitleLink0').findAll(text=True)
        name = clean_re.search(u''.join(name)).group(1)
        cat = result.find('div', 'itemcategories').a.renderContents()
        rating = result.find('div', 'rating').img['alt']
        rating = rating.replace(' star rating', '')
        reviews = result.find('span', 'reviews').renderContents()
        address = [i.strip() for i in result.address.findAll(text=True)]
        address = u', '.join(part for part in address if part)
        url = urljoin(BASEURL, result.find('a', id='bizTitleLink0')['href'])

        # return rendered page
        return RESULT_FMT % {
            'nick': nick,
            'name': name,
            'cat': cat,
            'rating': rating,
            'reviews': reviews,
            'address': address,
            'url': url
        }

示例#45

0

显示文件

文件： wikimedia.py 项目： Havvy/madcow

    def _getpage(self, url, opts=None):
        page = geturl(url, referer=self.baseurl, opts=opts)
        # HTMLParser doesn't handle this very well.. see:
        # http://www.crummy.com/software/BeautifulSoup/3.1-problems.html
        page = self.scripts_re.sub('', page)
        soup = BeautifulSoup(page)

        # get page title
        title = soup.title.string
        if self.advert and self.advert in title:
            title = title.replace(self.advert, '')

        # remove all tabular data/sidebars
        for table in soup.findAll('table'):
            table.extract()

        # remove disambiguation links
        for div in soup.findAll('div', 'dablink'):
            div.extract()

        # remove latitude/longitude metadata for places
        for span in soup.findAll('span', id='coordinates'):
            span.extract()

        # strip non-english content wrappers
        for span in soup.findAll('span', lang=True):
            span.extract()

        # remove IPA pronounciation guidelines
        for span in soup.findAll('span', 'IPA'):
            span.extract()
        for a in soup.findAll('a', text='IPA'):
            a.extract()
        for span in soup.findAll('span', 'audiolink'):
            span.extract()

        return soup, title

示例#46

0

显示文件

文件： chuck.py 项目： seunboi4u/madcow

 def response(self, nick, args, kwargs):
     doc = json.loads(geturl(self.baseurl))
     return doc['value']['joke'].replace('&quot', "'")