Exemplos de get_html em Python, exemplos de cloudbot.util.http.get_html em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: snopes.py Projeto: daboross/EliRefresh

def snopes(text):
    """snopes <topic> -- Searches snopes for an urban legend about <topic>."""

    search_page = http.get_html(search_url, sp_q=text, sp_c="1")
    result_urls = search_page.xpath("//a[@target='_self']/@href")

    if not result_urls:
        return "no matching pages found"

    snopes_page = http.get_html(result_urls[0])
    snopes_text = snopes_page.text_content()

    claim = re.search(r"Claim: .*", snopes_text).group(0).strip()
    status = re.search(r"Status: .*", snopes_text)

    if status is not None:
        status = status.group(0).strip()
    else:  # new-style statuses
        status = "Status: {}.".format(re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED",
                                                snopes_text).group(0).title())

    claim = re.sub(r"[\s\xa0]+", " ", claim)  # compress whitespace
    status = re.sub(r"[\s\xa0]+", " ", status)

    return "{} {} {}".format(claim, status, result_urls[0])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: scene.py Projeto: daboross/EliRefresh

def pre(inp):
    """pre <query> -- searches scene releases using orlydb.com"""

    try:
        h = http.get_html("http://orlydb.com/", q=inp)
    except http.HTTPError as e:
        return 'Unable to fetch results: {}'.format(e)

    results = h.xpath("//div[@id='releases']/div/span[@class='release']/..")

    if not results:
        return "No results found."

    result = results[0]

    date = result.xpath("span[@class='timestamp']/text()")[0]
    section = result.xpath("span[@class='section']//text()")[0]
    name = result.xpath("span[@class='release']/text()")[0]

    # parse date/time
    date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
    date_string = date.strftime("%d %b %Y")
    since = timesince.timesince(date)

    size = result.xpath("span[@class='inforight']//text()")
    if size:
        size = ' - ' + size[0].split()[0]
    else:
        size = ''

    return '{} - {}{} - {} ({} ago)'.format(section, name, size, date_string, since)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: reddit.py Projeto: daboross/EliRefresh

def reddit_url(match):
    thread = http.get_html(match.group(1))

    title = thread.xpath('//title/text()')[0]
    upvotes = thread.xpath("//span[@class='upvotes']/span[@class='number']/text()")[0]
    downvotes = thread.xpath("//span[@class='downvotes']/span[@class='number']/text()")[0]
    author = thread.xpath("//div[@id='siteTable']//a[contains(@class,'author')]/text()")[0]
    timeago = thread.xpath("//div[@id='siteTable']//p[@class='tagline']/time/text()")[0]
    comments = thread.xpath("//div[@id='siteTable']//a[@class='comments']/text()")[0]

    return '\x02{}\x02 - posted by \x02{}\x02 {} ago - {} upvotes, {} downvotes - {}'.format(
        title, author, timeago, upvotes, downvotes, comments)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: sportscores.py Projeto: CrushAndRun/Cloudbot-Fluke

def nhlScores(text=" "):
    """nhl <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included."""
    response = http.get_html('http://scores.espn.go.com/nhl/bottomline/scores', decode=False)
    game = ""
    score = response.text_content()
    raw=score.replace('%20',' ')
    raw=raw.replace('^','')
    raw=raw.replace('&','\n')
    pattern = re.compile("nhl_s_left\d+=(.*)")
    for match in re.findall(pattern, raw):
        if text.lower() in match.lower():
            game = game +  match + "  "
    return(game)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: etymology.py Projeto: MikeRixWolfe/CloudBot

def etymology(text, message):
    """<word> - Retrieves the etymology of chosen word."""
    url = 'http://www.etymonline.com/search'
    try:
        params = {'q': text}
        h = http.get_html(url, params=params)
    except:
        return "Error fetching etymology."
    etym = h.xpath('//section')

    if not etym:
        return 'No etymology found for ' + text

    etym = etym[0].text_content()
    etym = ' '.join(etym.split())

    message(formatting.truncate_str(etym, 400))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: sportscores.py Projeto: tiredtyrant/CloudBot-1

def scrape_scores(conn, chan, game, text):
    if not text:
        text = " "

    response = http.get_html(
        'http://scores.espn.go.com/{}/bottomline/scores'.format(game),
        decode=False)
    score = response.text_content()
    raw = score.replace('%20', ' ')
    raw = raw.replace('^', '')
    raw = raw.replace('&', '\n')
    pattern = re.compile(r"{}_s_left\d+=(.*)".format(game))
    scores = []
    for match in re.findall(pattern, raw):
        if text.lower() in match.lower():
            scores.append(match)

    return page_scores(conn, chan, scores)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: sportscores.py Projeto: ihatevim/aetherbot

def nbaScores(chan, text=" "):
    """nba <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included."""
    search_pages[chan] = []
    search_pages[chan+"index"] = 0
    response = http.get_html('http://scores.espn.go.com/nba/bottomline/scores', decode=False)
    game = ""
    score = response.text_content()
    raw=score.replace('%20',' ')
    raw=raw.replace('^','')
    raw=raw.replace('&','\n')
    pattern = re.compile("nba_s_left\d+=(.*)")
    for match in re.findall(pattern, raw):
        if text.lower() in match.lower():
            game = game +  match + " | "
    game = smart_truncate(game)
    game = game[:-2]
    game = two_lines(game, chan)
    if len(search_pages[chan]) > 1:
        return "{}(page {}/{}) .morescore".format(game, search_pages[chan+"index"] + 1, len(search_pages[chan]))
    return(game)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: minecraft_wiki.py Projeto: daboross/EliRefresh

def mcwiki(text):
    """mcwiki <phrase> - gets the first paragraph of the Minecraft Wiki article on <phrase>"""

    try:
        j = http.get_json(api_url, search=text)
    except (http.HTTPError, http.URLError) as e:
        return "Error fetching search results: {}".format(e)
    except ValueError as e:
        return "Error reading search results: {}".format(e)

    if not j[1]:
        return "No results found."

    # we remove items with a '/' in the name, because
    # gamepedia uses sub-pages for different languages
    # for some stupid reason
    items = [item for item in j[1] if not "/" in item]

    if items:
        article_name = items[0].replace(' ', '_').encode('utf8')
    else:
        # there are no items without /, just return a / one
        article_name = j[1][0].replace(' ', '_').encode('utf8')

    url = mc_url + http.quote(article_name, '')

    try:
        page = http.get_html(url)
    except (http.HTTPError, http.URLError) as e:
        return "Error fetching wiki page: {}".format(e)

    for p in page.xpath('//div[@class="mw-content-ltr"]/p'):
        if p.text_content():
            summary = " ".join(p.text_content().splitlines())
            summary = re.sub("\[\d+\]", "", summary)
            summary = formatting.truncate_str(summary, 200)
            return "{} :: {}".format(summary, url)

    # this shouldn't happen
    return "Unknown Error."

Exemplo n.º 9

0

Exibir arquivo

Arquivo: sportscores.py Projeto: mvalino0201/PTBot

def mlbScores(chan, text=" "):
    """mlb <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included."""
    search_pages[chan] = []
    search_pages[chan + "index"] = 0
    response = http.get_html('http://scores.espn.go.com/mlb/bottomline/scores',
                             decode=False)
    game = ""
    score = response.text_content()
    raw = score.replace('%20', ' ')
    raw = raw.replace('^', '')
    raw = raw.replace('&', '\n')
    pattern = re.compile("mlb_s_left\d+=(.*)")
    for match in re.findall(pattern, raw):
        if text.lower() in match.lower():
            game = game + match + " | "
    game = smart_truncate(game)
    game = game[:-2]
    game = two_lines(game, chan)
    if len(search_pages[chan]) > 1:
        return "{}(page {}/{}) .morescore".format(
            game, search_pages[chan + "index"] + 1, len(search_pages[chan]))
    return (game)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: metacritic.py Projeto: daboross/EliRefresh

def metacritic(text):
    """[all|movie|tv|album|x360|ps3|pc|gba|ds|3ds|wii|vita|wiiu|xone|ps4] <title> - gets rating for <title> from metacritic on the specified medium"""

    args = text.strip()

    game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii',
                      'vita', 'wiiu', 'xone', 'ps4')

    all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')

    try:
        plat, title = args.split(' ', 1)
        if plat not in all_platforms:
            # raise the ValueError so that the except block catches it
            # in this case, or in the case of the .split above raising the
            # ValueError, we want the same thing to happen
            raise ValueError
    except ValueError:
        plat = 'all'
        title = args

    cat = 'game' if plat in game_platforms else plat

    title_safe = http.quote_plus(title)

    url = 'http://www.metacritic.com/search/{}/{}/results'.format(cat, title_safe)

    try:
        doc = http.get_html(url)
    except HTTPError:
        return 'error fetching results'

    # get the proper result element we want to pull data from
    result = None

    if not doc.find_class('query_results'):
        return 'No results found.'

    # if they specified an invalid search term, the input box will be empty
    if doc.get_element_by_id('search_term').value == '':
        return 'Invalid search term.'

    if plat not in game_platforms:
        # for [all] results, or non-game platforms, get the first result
        result = doc.find_class('result first_result')[0]

        # find the platform, if it exists
        result_type = result.find_class('result_type')
        if result_type:

            # if the result_type div has a platform div, get that one
            platform_div = result_type[0].find_class('platform')
            if platform_div:
                plat = platform_div[0].text_content().strip()
            else:
                # otherwise, use the result_type text_content
                plat = result_type[0].text_content().strip()

    else:
        # for games, we want to pull the first result with the correct
        # platform
        results = doc.find_class('result')
        for res in results:
            result_plat = res.find_class('platform')[0].text_content().strip()
            if result_plat == plat.upper():
                result = res
                break

    if not result:
        return 'No results found.'

    # get the name, release date, and score from the result
    product_title = result.find_class('product_title')[0]
    name = product_title.text_content()
    link = 'http://metacritic.com' + product_title.find('a').attrib['href']

    try:
        release = result.find_class('release_date')[0]. \
            find_class('data')[0].text_content()

        # strip extra spaces out of the release date
        release = re.sub(r'\s{2,}', ' ', release)
    except IndexError:
        release = None

    try:
        score = result.find_class('metascore_w')[0].text_content()
    except IndexError:
        score = None

    return '[{}] {} - \x02{}/100\x02, {} - {}'.format(plat.upper(), name, score or 'no score',
                                                      'release: \x02%s\x02' % release if release else 'unreleased',
                                                      link)