Python shorten_url示例，botmodules.tools.shorten_url Python示例

示例#1

0

显示文件

文件： news.py 项目： cactauz/genmaybot

def google_news (self, e):
    query = urllib.quote(e.input)
    url = ""
    if not query:
        url = "http://news.google.com/news?ned=us&topic=h&output=rss"
    else:
        url = "http://news.google.com/news?q=%s&output=rss" % query
    
           
    dom = xml.dom.minidom.parse(urllib2.urlopen(url))
    newest_news = dom.getElementsByTagName('item')[0]
    title = newest_news.getElementsByTagName('title')[0].childNodes[0].data
    description = BeautifulSoup(newest_news.getElementsByTagName('description')[0].childNodes[0].data)
    
    links = description.findAll('a')
    for link in links:
        link.extract()          
    links = description.findAll(color='#6f6f6f')
    for link in links:
        link.extract()
    
    description = str(description).strip().decode("utf-8", 'ignore')
    description = tools.remove_html_tags(description)
    description = tools.decode_htmlentities(description)
    description = description[0:len(description) - 9]
    if description.rfind(".")!=-1:
        description = description[0:description.rfind(".")+1]
    link = tools.shorten_url(newest_news.getElementsByTagName('link')[0].childNodes[0].data)
    
    e.output = "%s - %s [ %s ]" % (title.encode("utf-8", 'ignore'), description.encode("utf-8", 'ignore'), link.encode("utf-8", 'ignore'))
    
    return e

示例#2

0

显示文件

文件： news.py 项目： cactauz/genmaybot

def google_news(self, e):
    query = urllib.quote(e.input)
    url = ""
    if not query:
        url = "http://news.google.com/news?ned=us&topic=h&output=rss"
    else:
        url = "http://news.google.com/news?q=%s&output=rss" % query

    dom = xml.dom.minidom.parse(urllib2.urlopen(url))
    newest_news = dom.getElementsByTagName('item')[0]
    title = newest_news.getElementsByTagName('title')[0].childNodes[0].data
    description = BeautifulSoup(
        newest_news.getElementsByTagName('description')[0].childNodes[0].data)

    links = description.findAll('a')
    for link in links:
        link.extract()
    links = description.findAll(color='#6f6f6f')
    for link in links:
        link.extract()

    description = str(description).strip().decode("utf-8", 'ignore')
    description = tools.remove_html_tags(description)
    description = tools.decode_htmlentities(description)
    description = description[0:len(description) - 9]
    if description.rfind(".") != -1:
        description = description[0:description.rfind(".") + 1]
    link = tools.shorten_url(
        newest_news.getElementsByTagName('link')[0].childNodes[0].data)

    e.output = "%s - %s [ %s ]" % (title.encode(
        "utf-8", 'ignore'), description.encode(
            "utf-8", 'ignore'), link.encode("utf-8", 'ignore'))

    return e

示例#3

0

显示文件

文件： gwiki.py 项目： iamsix/genmaybot

def gwiki(bot, e):
    url = (
        'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+'
        + urllib.parse.quote(e.input))
    request = urllib.request.Request(url, None,
                                     {'Referer': 'http://irc.00id.net'})
    response = urllib.request.urlopen(request)

    results_json = json.loads(response.read().decode('utf-8'))
    results = results_json['responseData']['results']
    regexstring = "wikipedia.org/wiki/"
    result = results[0]
    m = re.search(regexstring, result['url'])
    if (m):
        url = result['url']
        url = tools.shorten_url(url.replace('%25', '%'))
        #content = result['content'].encode('utf-8')

        content = tools.decode_htmlentities(
            tools.remove_html_tags(result['content']))
        content = re.sub('\s+', ' ', content)
        content = content.replace("...", "")
        #print content
        #content = content.decode('unicode-escape')
        #e.output = content
        e.output = "%s [ %s ]" % (content, url)
    return e

示例#4

0

显示文件

文件： rt.py 项目： cactauz/genmaybot

def get_rt(self, e):
    url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey=%s&q=%s&page_limit=1" % (tools.config.rtAPIkey, urllib2.quote(e.input) )
    response = urllib2.urlopen(url).read() 
    movie = json.loads(response)
    movie = movie['movies'][0]
    concensus = ""
    if 'critics_consensus' in movie:
        concensus = "- " + movie['critics_consensus']
    url = tools.shorten_url(movie['links']['alternate']) 
    e.output = "%s (%s) - Critics: %s - Users: %s %s [ %s ]" % (movie['title'], str(movie['year']), str(movie['ratings']['critics_score']), str(movie['ratings']['audience_score']), concensus, url )
    return e

示例#5

0

显示文件

文件： rt.py 项目： cactauz/genmaybot

def get_rt(self, e):
    url = "http://api.rottentomatoes.com/api/public/v1.0/movies.json?apikey=%s&q=%s&page_limit=1" % (
        tools.config.rtAPIkey, urllib2.quote(e.input))
    response = urllib2.urlopen(url).read()
    movie = json.loads(response)
    movie = movie['movies'][0]
    concensus = ""
    if 'critics_consensus' in movie:
        concensus = "- " + movie['critics_consensus']
    url = tools.shorten_url(movie['links']['alternate'])
    e.output = "%s (%s) - Critics: %s - Users: %s %s [ %s ]" % (
        movie['title'], str(
            movie['year']), str(movie['ratings']['critics_score']),
        str(movie['ratings']['audience_score']), concensus, url)
    return e

示例#6

0

显示文件

文件： weather_alerts.py 项目： cactauz/genmaybot

def get_weather_alert_data(alert_url):
    try:
        request = urllib2.urlopen(alert_url)
        dom = xml.dom.minidom.parse(request)
        msgType = dom.getElementsByTagName('msgType')[0].childNodes[0].data
        note = dom.getElementsByTagName('description')[0].childNodes[0].data
        note = note.replace("\n", " ")
        pattern = re.compile("\s+")
        note = pattern.sub(" ", note)
        ##turning off the text for now because its too much spam
        note = ""

        event = dom.getElementsByTagName('event')[0].childNodes[0].data
        urgency = dom.getElementsByTagName('urgency')[0].childNodes[0].data
        severity = dom.getElementsByTagName('severity')[0].childNodes[0].data
        certainty = dom.getElementsByTagName('certainty')[0].childNodes[0].data
        senderName = dom.getElementsByTagName(
            'senderName')[0].childNodes[0].data

        ## Use the "effective" value because "sent" changes every time
        ## the document is retrieved
        updated = dom.getElementsByTagName('effective')[0].childNodes[0].data
        updated = dateparser(updated)
        updated = (updated - updated.utcoffset()).replace(tzinfo=None)
        ago = (datetime.datetime.utcnow() - updated).seconds / 60

        short_url = tools.shorten_url(alert_url)
        ## old text, too verbose
        ##alert_text = "[%s] %s: %s Urgency: %s Severity: %s Certainty: %s | %s (%s minutes ago)" % (senderName, msgType, event, urgency, severity, certainty, note[0:170], ago)
        ## new text is self limiting to the IRC limit of 428 characters

        alert_text_start = "[%s] %s: %s" % (senderName, msgType, event)
        alert_text_end = "(%s minutes ago) [ %s ]" % (ago, short_url)

        alert_text = "%s | %s %s" % (alert_text_start, note[:425 - (
            len(alert_text_start + alert_text_end))], alert_text_end)
        return alert_text

    except Exception as inst:
        print "get_weather_alert_data: " + str(inst)
        pass

示例#7

0

显示文件

文件： weather_alerts.py 项目： cactauz/genmaybot

def get_weather_alert_data(alert_url):
  try:
    request = urllib2.urlopen(alert_url)
    dom = xml.dom.minidom.parse(request)
    msgType = dom.getElementsByTagName('msgType')[0].childNodes[0].data
    note = dom.getElementsByTagName('description')[0].childNodes[0].data
    note = note.replace("\n"," ")
    pattern = re.compile("\s+")
    note = pattern.sub(" ", note)
    ##turning off the text for now because its too much spam
    note = ""
    
    event = dom.getElementsByTagName('event')[0].childNodes[0].data
    urgency = dom.getElementsByTagName('urgency')[0].childNodes[0].data
    severity = dom.getElementsByTagName('severity')[0].childNodes[0].data
    certainty = dom.getElementsByTagName('certainty')[0].childNodes[0].data
    senderName = dom.getElementsByTagName('senderName')[0].childNodes[0].data
    
    ## Use the "effective" value because "sent" changes every time 
    ## the document is retrieved
    updated = dom.getElementsByTagName('effective')[0].childNodes[0].data
    updated = dateparser(updated)
    updated = (updated - updated.utcoffset()).replace(tzinfo=None)
    ago = (datetime.datetime.utcnow() - updated).seconds/60


    short_url = tools.shorten_url(alert_url)
    ## old text, too verbose
    ##alert_text = "[%s] %s: %s Urgency: %s Severity: %s Certainty: %s | %s (%s minutes ago)" % (senderName, msgType, event, urgency, severity, certainty, note[0:170], ago)
    ## new text is self limiting to the IRC limit of 428 characters
    
    alert_text_start = "[%s] %s: %s" % (senderName, msgType, event)
    alert_text_end = "(%s minutes ago) [ %s ]" % (ago, short_url)
    
    alert_text = "%s | %s %s" % (alert_text_start, note[:425-(len(alert_text_start+alert_text_end))], alert_text_end)
    return alert_text
    
  except Exception as inst:
    print "get_weather_alert_data: " + str(inst)
    pass

示例#8

0

显示文件

文件： urban_dictionary.py 项目： cactauz/genmaybot

def get_urbandictionary_wotd():

    url = "http://www.urbandictionary.com"
    try:
        opener = urllib2.build_opener()
        opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)")]
        pagetmp = opener.open(url)
        page = pagetmp.read()
        opener.close()

        page = BeautifulSoup(page)

        first_definition = ""

        first_word = page.findAll('div',
                                  attrs={"class": "word"
                                         })[0].contents[1].contents[0].string
        first_word = first_word.encode("utf-8", 'ignore')

        for content in page.findAll('div', attrs={"class":
                                                  "definition"})[0].contents:
            if content.string != None:
                first_definition += content.string

        first_definition = first_definition.encode("utf-8", 'ignore')
        first_definition = tools.decode_htmlentities(
            first_definition.decode("utf-8",
                                    'ignore')).encode("utf-8", 'ignore')
        first_definition = first_definition.replace("\n", " ")

        wotd = (first_word.decode('utf-8') + ": " +
                first_definition.decode('utf-8') +
                " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore')

        return wotd
    except:
        print "!ud wotd went wrong"
        return

示例#9

0

显示文件

文件： gwiki.py 项目： KpaBap/genmaybot

def gwiki(bot, e):
      url = ('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=site:wikipedia.org+' + urllib.parse.quote(e.input))
      request = urllib.request.Request(url, None, {'Referer': 'http://irc.00id.net'})
      response = urllib.request.urlopen(request)

      results_json = json.loads(response.read().decode('utf-8'))
      results = results_json['responseData']['results']
      regexstring = "wikipedia.org/wiki/"
      result = results[0]
      m = re.search(regexstring,result['url'])   
      if (m):
         url = result['url']
         url = tools.shorten_url(url.replace('%25','%'))
         #content = result['content'].encode('utf-8')
         
         content = tools.decode_htmlentities(tools.remove_html_tags(result['content']))
         content = re.sub('\s+', ' ', content)
         content = content.replace("...", "")
         #print content
         #content = content.decode('unicode-escape')
         #e.output = content
         e.output = "%s [ %s ]" % (content, url)
      return e

示例#10

0

显示文件

文件： beer.py 项目： kconst/genmaybot

def advocate_beer(self, e):
    query = e.input
    # get the name, rating and style of a beer from beeradvocate.com
    url = tools.google_url("site:beeradvocate.com " + query, "/beer/profile/[0-9]*?/[0-9]+")
    # url = "http://beeradvocate.com/beer/profile/306/1212/"

    socket.setdefaulttimeout(30)
    try:
        beerpage = urllib.request.urlopen(url).read().decode("utf-8")
    except:
        return None
    socket.setdefaulttimeout(10)

    titlestart = beerpage.find("<title>") + 7
    titleend = beerpage.find(" - ", titlestart)
    beertitle = beerpage[titlestart:titleend]

    score_start_tag = '<span class="BAscore_big">'
    score_end_tag = "Reviews</td>"

    start = beerpage.find(score_start_tag) + len(score_start_tag)
    score_line = beerpage[start : start + 100]

    find_start_tag = '</span>\n<br><a href="/help/index?topic=ratings"><b>'
    find_end_tag = "</b></a>\n<br>-<br>"

    # print score_line

    grade = score_line[0 : score_line.find(find_start_tag)]

    # print "\n" + grade
    grade_wording = score_line[score_line.find(find_start_tag) + len(find_start_tag) : score_line.rfind(find_end_tag)]
    # print grade_wording
    if grade_wording == "":
        grade_wording = "N/A"

    find_start_tag = find_end_tag
    find_end_tag = "</td>"

    num_reviews = score_line[score_line.rfind(find_start_tag) + len(find_start_tag) : score_line.find(find_end_tag)]

    # print num_reviews

    find_start_tag = "Style | ABV"
    style_line = beerpage[beerpage.find(find_start_tag) : beerpage.find(find_start_tag) + 120]

    find_start_tag = "><b>"
    find_end_tag = "</b></a> | &nbsp;"

    style = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag)]

    find_start_tag = find_end_tag
    find_end_tag = "% <a href"

    abv = style_line[style_line.find(find_start_tag) + len(find_start_tag) : style_line.find(find_end_tag) + 1]
    response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % (
        beertitle,
        grade,
        grade_wording,
        num_reviews,
        style,
        abv,
        tools.shorten_url(url),
    )
    e.output = response_string
    return e

示例#11

0

显示文件

文件： urban_dictionary.py 项目： cactauz/genmaybot

def get_urbandictionary_wotd():

  url = "http://www.urbandictionary.com"
  try:
    opener = urllib2.build_opener()
    opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")]
    pagetmp = opener.open(url)
    page = pagetmp.read()
    opener.close()

    page = BeautifulSoup(page)
    
    first_definition = ""
    
    first_word = page.findAll('div',attrs={"class" : "word"})[0].contents[1].contents[0].string
    first_word = first_word.encode("utf-8", 'ignore')
    
    
    for content in page.findAll('div',attrs={"class" : "definition"})[0].contents:
      if content.string != None:
        first_definition += content.string

    first_definition = first_definition.encode("utf-8", 'ignore')
    first_definition = tools.decode_htmlentities(first_definition.decode("utf-8", 'ignore')).encode("utf-8", 'ignore')
    first_definition = first_definition.replace("\n", " ")

    wotd = (first_word.decode('utf-8') + ": " + first_definition.decode('utf-8') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore')

    return wotd
  except:
    print "!ud wotd went wrong"
    return

示例#12

0

显示文件

文件： urban_dictionary.py 项目： cactauz/genmaybot

def get_urbandictionary(self, e):
    searchterm = e.input
    url = "http://www.urbandictionary.com/define.php?term=%s" % urllib2.quote(searchterm)
    if searchterm=="wotd":
      e.output = get_urbandictionary_wotd()
      return e
    
    if searchterm== "":
      url = "http://www.urbandictionary.com/random.php"
    
    try:
      opener = urllib2.build_opener()
      opener.addheaders = [('User-Agent',"Opera/9.10 (YourMom 8.0)")]
      pagetmp = opener.open(url)
      page = pagetmp.read()
      url = pagetmp.geturl()
      opener.close()

      page = BeautifulSoup(page)
      first_definition= ""
      
      if page.find(id='not_defined_yet') != None:
          return None
      
      ## depending on the search results the first word may be contained directly under the <td class='word'> tag
      ## or it may be the text contents of a <a href> tag
      ## we first try to get it from inside a <td><a href>[word]</a></td> type structure
      ## if that fails, get the word under the initial <td> tag

      try:
        first_word = page.findAll('td',attrs={"class" : "word"})[0].contents[1].string
      except:
        first_word = page.findAll('td',attrs={"class" : "word"})[0].contents[0].string     
      
      first_word = first_word.replace("\n","")
      #first_word = first_word.encode("utf-8", 'ignore')

      for content in page.findAll('div',attrs={"class" : "definition"})[0].contents:
        if content.string != None:
          first_definition += content.string

      #first_definition = first_definition.encode("utf-8", 'ignore')
      first_definition = tools.decode_htmlentities(first_definition).encode("utf-8", 'ignore')
      first_word = tools.decode_htmlentities(first_word).encode("utf-8", 'ignore')

      first_definition = first_definition.replace("\n", " ")
      first_definition = first_definition.replace("\r", " ")
      first_definition = first_definition[0:392]

      first_definition = ((first_word + ": " + first_definition).decode("utf-8", 'ignore') + " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore')
      #print first_definition
      e.output = first_definition
      return e
      
    except:
      print "!ud %s went wrong" % searchterm
      return

示例#13

0

显示文件

文件： wiki.py 项目： cactauz/genmaybot

def get_wiki(self, e, urlposted=False):
    # read the first paragraph of a wikipedia article
    searchterm = e.input

    if urlposted:
        url = searchterm
    else:
        if searchterm == "":
            url = "http://en.wikipedia.org/wiki/Special:Random"
        else:
            url = tools.google_url("site:wikipedia.org " + searchterm, "wikipedia.org/wiki")

    title = ""

    if url and url.find("wikipedia.org/wiki/File:") != -1:

        file_title = get_wiki_file_description(url)

        if file_title:
            e.output = file_title
            return e

    if url and url.find("wikipedia.org/wiki/") != -1:

        try:
            opener = urllib2.build_opener()
            opener.addheaders = [("User-Agent", "Opera/9.10 (YourMom 8.0)")]
            pagetmp = opener.open(url)
            page = pagetmp.read()
            url = pagetmp.geturl()
            opener.close()

            if url.find("#") != -1:
                anchor = url.split("#")[1]
                page = page[page.find('id="' + anchor) :]

            page = BeautifulSoup(page)
            tables = page.findAll("table")
            for table in tables:
                table.extract()

            page = page.findAll("p")
            if str(page[0])[0:9] == "<p><span ":
                page = unicode(page[1].extract())
            else:
                page = unicode(page[0].extract())

            title = tools.remove_html_tags(re.search("(?s)\<p\>(.*?)\<\/p\>", page).group(1))
            title = title.encode("utf-8", "ignore")
            title = title.replace("<", "")
            rembracket = re.compile(r"\[.*?\]")
            title = rembracket.sub("", title)
            # title = re.sub("\&.*?\;", " ", title)
            title = title.replace("\n", " ")

            title = tools.decode_htmlentities(title.decode("utf-8", "ignore")).encode("utf-8", "ignore")

            title = title[0:420]
            if title.rfind(".") != -1:
                title = title[0 : title.rfind(".") + 1]

            if not urlposted:
                url = tools.shorten_url(url)
                title = (title.decode("utf-8", "ignore") + " [ %s ]" % url).encode("utf-8", "ignore")
        except Exception as inst:
            print "!wiki " + searchterm + " : " + str(inst)
            title = tools.remove_html_tags(re.search("\<p\>(.*?\.) ", str(page)).group(1))

    e.output = title
    return e

示例#14

0

显示文件

def advocate_beer(self, e):
    query = e.input
    #get the name, rating and style of a beer from beeradvocate.com
    url = tools.google_url("site:beeradvocate.com " + query,
                           "/beer/profile/[0-9]*/")
    #url = "http://beeradvocate.com/beer/profile/306/1212/"
    socket.setdefaulttimeout(30)
    try:
        beerpage = urllib2.urlopen(url).read()  #.decode("ISO-8859-1")
    except:
        return None
    socket.setdefaulttimeout(10)

    titlestart = beerpage.find("<title>") + 7
    titleend = beerpage.find(" - ", titlestart)
    beertitle = beerpage[titlestart:titleend]

    score_start_tag = '<span class="BAscore_big">'
    score_end_tag = 'Reviews</td>'

    start = beerpage.find(score_start_tag) + len(score_start_tag)
    score_line = beerpage[start:start + 50]

    find_start_tag = "</span>\n<br>"
    find_end_tag = "<br>"

    #print score_line

    grade = score_line[0:score_line.find(find_start_tag)]

    #print "\n" + grade
    grade_wording = score_line[score_line.find(find_start_tag) +
                               len(find_start_tag):score_line.
                               rfind(find_end_tag)]
    #print grade_wording

    find_start_tag = find_end_tag
    find_end_tag = "</td>"

    num_reviews = score_line[score_line.rfind(find_start_tag) +
                             len(find_start_tag):score_line.find(find_end_tag)]

    #print num_reviews

    find_start_tag = "Style | ABV"
    style_line = beerpage[beerpage.find(find_start_tag
                                        ):beerpage.find(find_start_tag) + 120]

    find_start_tag = "><b>"
    find_end_tag = "</b></a> | &nbsp;"

    style = style_line[style_line.find(find_start_tag) +
                       len(find_start_tag):style_line.find(find_end_tag)]

    find_start_tag = find_end_tag
    find_end_tag = "% <a href"

    abv = style_line[style_line.find(find_start_tag) +
                     len(find_start_tag):style_line.find(find_end_tag) + 1]
    response_string = "Beer: %s - Grade: %s [%s, %s] Style: %s ABV: %s [ %s ]" % (
        beertitle, grade, grade_wording, num_reviews, style, abv,
        tools.shorten_url(url))
    e.output = response_string
    return e

示例#15

0

显示文件

文件： urban_dictionary.py 项目： cactauz/genmaybot

def get_urbandictionary(self, e):
    searchterm = e.input
    url = "http://www.urbandictionary.com/define.php?term=%s" % urllib2.quote(
        searchterm)
    if searchterm == "wotd":
        e.output = get_urbandictionary_wotd()
        return e

    if searchterm == "":
        url = "http://www.urbandictionary.com/random.php"

    try:
        opener = urllib2.build_opener()
        opener.addheaders = [('User-Agent', "Opera/9.10 (YourMom 8.0)")]
        pagetmp = opener.open(url)
        page = pagetmp.read()
        url = pagetmp.geturl()
        opener.close()

        page = BeautifulSoup(page)
        first_definition = ""

        if page.find(id='not_defined_yet') != None:
            return None

        ## depending on the search results the first word may be contained directly under the <td class='word'> tag
        ## or it may be the text contents of a <a href> tag
        ## we first try to get it from inside a <td><a href>[word]</a></td> type structure
        ## if that fails, get the word under the initial <td> tag

        try:
            first_word = page.findAll('td',
                                      attrs={"class":
                                             "word"})[0].contents[1].string
        except:
            first_word = page.findAll('td',
                                      attrs={"class":
                                             "word"})[0].contents[0].string

        first_word = first_word.replace("\n", "")
        #first_word = first_word.encode("utf-8", 'ignore')

        for content in page.findAll('div', attrs={"class":
                                                  "definition"})[0].contents:
            if content.string != None:
                first_definition += content.string

        #first_definition = first_definition.encode("utf-8", 'ignore')
        first_definition = tools.decode_htmlentities(first_definition).encode(
            "utf-8", 'ignore')
        first_word = tools.decode_htmlentities(first_word).encode(
            "utf-8", 'ignore')

        first_definition = first_definition.replace("\n", " ")
        first_definition = first_definition.replace("\r", " ")
        first_definition = first_definition[0:392]

        first_definition = (
            (first_word + ": " + first_definition).decode("utf-8", 'ignore') +
            " [ %s ]" % tools.shorten_url(url)).encode('utf-8', 'ignore')
        #print first_definition
        e.output = first_definition
        return e

    except:
        print "!ud %s went wrong" % searchterm
        return