Пример #1
0
def refresh_cache():
    print '[+] refreshing distrowatch cache'
    output = '[DistroWatch]'

    def parse_table(data):
        global data_limit
        distro_names = []

        for distro in iterable.limit(data_limit, data):
            distro_names.append(distro.text.strip())

        return ', '.join(distro_names)

    # most popular distros in the last 12, 6 and 1 months
    html = request.get('https://distrowatch.com/dwres.php?resource=popularity')
    soup = BeautifulSoup(html, 'lxml')
    tables = soup.select('td.NewsText tr td table')

    for table in tables:
        header = table.find('th', attrs={'class': 'Invert'})
        data = table.find_all('td', attrs={'class': 'phr2'})

        # skip table if it doesn't have distro info
        if header is None or data is None:
            continue

        # skip this table if its not wanted
        header = header.text.strip()
        if header not in allowed_datasets:
            continue

        output = output + ' \x02Popular\x02 (' + header.replace('Last ',
                                                                '') + '): '
        output = output + parse_table(data) + '.'

    # trending distros in the past 12, 6 and 1 months
    html = request.get('https://distrowatch.com/dwres.php?resource=trending')
    soup = BeautifulSoup(html, 'lxml')
    tables = soup.select('table table table table.News')

    for table in tables:
        header = table.find('th', attrs={'class': 'Invert'})
        data = table.parent.find_all('td', attrs={'class': 'phr2'})

        if header is None or data is None:
            continue

        # skip this table if its not wanted
        header = header.text.strip()
        if header not in allowed_datasets:
            continue

        output = output + ' \x02Trending\x02 (' + header.replace(
            'Trending ', '') + '): '
        output = output + parse_table(data) + '.'

    global cache
    cache = output
Пример #2
0
def fetch(start, dest):
    start = request.urlencode(start)
    dest = request.urlencode(dest)
    url = "http://www.travelmath.com/flying-distance/from/{}/to/{}".format(
        start, dest)
    html = request.get(url)
    return html
Пример #3
0
def kernel(inp, reply=None):
    data = request.get("https://www.kernel.org/finger_banner")
    lines = data.split('\n')

    versions = []
    old_versions = []
    for line in lines:
        info = re.match(
            r'^The latest ([[a-z0-9 \-\.]+) version of the Linux kernel is:\s*(.*)$',
            line)
        if info is None:
            continue

        name = info.group(1)
        version = info.group(2)

        if 'longterm' in name:
            old_versions.append(version)
        else:
            versions.append(name + ': ' + version)

    output = 'Linux kernel versions: ' + '; '.join(versions)

    if len(old_versions) > 0:
        output = output + '. Old longterm versions: ' + ', '.join(old_versions)

    return output
Пример #4
0
def debt(inp):
    """debt -- returns the us national debt"""

    url = "https://commodity.com/debt-clock/us/"
    html = request.get(url)
    debt = parse(html)

    return "Current US Debt: \x02{}\x02".format(debt)
Пример #5
0
def refresh_cache():
    print "[+] refreshing fmylife cache"
    html = request.get('https://www.fmylife.com/random')
    soup = BeautifulSoup(html, 'lxml')
    posts = soup.find_all('a', attrs={'class': 'article-link'})

    for post in posts:
        id = post['href'].split('_')[1].split('.')[0]
        text = post.text.strip()
        cache.append((id, text))
Пример #6
0
def amazon_url(match):
    id = match.group(2).strip()
    url = 'https://www.amazon.com/dp/' + id + '/'
    html = request.get(url)
    title, price = parse_product(html)

    if len(title) > 80:
        title = title[:80] + '...'

    return u'[Amazon] {} \x0303{}\x03 {}'.format(title, price, url)
Пример #7
0
def get_more_detail(api_path, gid):
    api_gid = 'gid_{}'.format(gid.replace('/', '_').replace('-', '_'))
    detail_linescore = '{}/{}/linescore.json'.format(api_path, api_gid)
    detail_eventlog = '{}/{}/eventLog.xml'.format(api_path, api_gid)

    try:
        linescore = get_json(detail_linescore)
    except Exception as e:
        return e

    if not isinstance(linescore, dict):
        return Exception('linescore is not an object')

    try:
        linescore = linescore['data']['game']
    except KeyError:
        return Exception('linescore structure is unexpected')

    # count
    balls = linescore.get('balls', 'unkn')
    strikes = linescore.get('strikes', 'unkn')
    outs = linescore.get('outs', 'unkn')

    runners_onbase = linescore.get('runner_on_base_status', 'unkn')

    pitcher = linescore.get('current_pitcher', dict()).get('last_name', 'unkn')
    batter = linescore.get('current_batter', dict()).get('last_name', 'unkn')

    # bonus
    latest_event = ''

    try:
        events_xml = BeautifulSoup(get(detail_eventlog))
        events = events_xml.find_all('event')

        maxval = -999
        for event in events:
            if int(event['number']) > maxval and event['description'] != '':
                maxval = int(event['number'])
                latest_event = event['description']
    except Exception as e:
        latest_event = e

    return {'balls':balls,
            'strikes':strikes,
            'outs':outs,
            'onbase':runners_onbase,
            'pitcher':pitcher,
            'batter':batter,
            'latest':latest_event}
Пример #8
0
def get_title(url):
    html = request.get(url)
    soup = BeautifulSoup(html, 'lxml')

    if '#' in url:
        postid = url.split('#')[1]
        post = soup.find('div', {'id': postid})
    else:
        post = soup.find('div', {'class': 'opContainer'})

    comment = process_text(
        post.find('blockquote', {
            'class': 'postMessage'
        }).renderContents().strip())
    return u"{} - {}".format(url, comment)  #
Пример #9
0
def refresh_cache():
    "gets a page of random bash.org quotes and puts them into a dictionary "
    print "[+] refreshing bash cache"
    html = request.get('http://bash.org/?random')
    soup = BeautifulSoup(html, 'lxml')
    quote_infos = soup.find_all('p', {'class': 'quote'})
    quotes = soup.find_all('p', {'class': 'qt'})

    num = 0
    while num < len(quotes):
        quote = quotes[num].text.replace('\n', ' ').replace('\r', ' |')
        id = quote_infos[num].contents[0].text
        votes = quote_infos[num].find('font').text
        cache.append((id, votes, quote))
        num += 1
Пример #10
0
def get_bash_quote(inp):
    try:
        inp = request.urlencode(inp)
        html = request.get('http://bash.org/?' + inp)
        soup = BeautifulSoup(html, 'lxml')
        quote_info = soup.find('p', {'class': 'quote'})
        quote = soup.find('p', {
            'class': 'qt'
        }).text.replace('\n', ' ').replace('\r', ' |')

        id = quote_info.contents[0].text
        votes = quote_info.find('font').text
        return u'\x02{}\x02 ({} votes): {}'.format(id, votes, quote)
    except:
        return "No quote found."
Пример #11
0
def anus_real(inp, nick=None):
    if not inp:
        inp = nick

    inp = request.urlencode(inp)
    html = request.get('http://en.inkei.net/anus/' + inp)
    soup = BeautifulSoup(html, 'lxml')

    details = soup.find(id='elmDescCmmn')
    if details is None:
        return 'Anus: http://en.inkei.net/anus/' + inp

    details = formatting.compress_whitespace(details.text)

    details = re.sub('Anus of [a-zA-Z0-9]+ ', 'Anus: ', details)
    return u'{} - http://en.inkei.net/anus/{}'.format(details, inp)
Пример #12
0
def query(query,
          useragent='python-duckduckgo ' + str(__version__),
          safesearch=False,
          html=False,
          meanings=True,
          **kwargs):
    """
    Query DuckDuckGo, returning a Results object.

    Here's a query that's unlikely to change:

    >>> result = query('1 + 1')
    >>> result.type
    'nothing'
    >>> result.answer.text
    '1 + 1 = 2'
    >>> result.answer.type
    'calc'

    Keword arguments:
    useragent: UserAgent to use while querying. Default: "python-duckduckgo %d" (str)
    safesearch: True for on, False for off. Default: True (bool)
    html: True to allow HTML in output. Default: False (bool)
    meanings: True to include disambiguations in results (bool)
    Any other keyword arguments are passed directly to DuckDuckGo as URL params.
    """ % __version__

    safesearch = '1' if safesearch else '-1'
    html = '0' if html else '1'
    meanings = '0' if meanings else '1'
    params = {
        'q': query.encode('utf-8'),
        'o': 'json',
        'kp': safesearch,
        'no_redirect': '1',
        'no_html': html,
        'd': meanings,
    }
    params.update(kwargs)

    response = request.get('https://api.duckduckgo.com/',
                           params=params,
                           headers={'User-Agent': useragent})
    json = json_loads(response)
    return Results(json)
Пример #13
0
def amazon(inp):
    """amazon [query] -- Searches amazon for query"""
    if not inp:
        return "usage: amazon <search>"

    inp = request.urlencode(inp)
    html = request.get('https://www.amazon.com/s?k=' + inp)
    results = parse(html)

    if len(results) == 0:
        return 'No results found'

    title, price, url = results[0]

    if len(title) > 80:
        title = title[:80] + '...'

    # \x03 = color, 03 = green
    return u'[Amazon] {} \x0303{}\x03 {}'.format(title, price, url)
Пример #14
0
def validate(inp):
    """validate <url> -- Runs url through the w3c markup validator."""

    if not inp.startswith('http'):
        inp = 'https://' + inp

    url = 'https://validator.w3.org/nu/?doc=' + request.urlencode(inp)
    html = request.get(url)
    soup = BeautifulSoup(html, 'lxml')
    results = soup.find('div', attrs={'id': 'results'})

    errors = len(results.find_all('li', attrs={'class': 'error'}))
    warns = len(results.find_all('li', attrs={'class': 'warning'}))
    info = len(results.find_all('li', attrs={'class': 'info'}))

    if errors == 0 and warns == 0 and info == 0:
        return "[w3c] Successfully validated with no errors"

    return "[w3c] Found {} errors, {} warnings and {} notices.".format(
        errors, warns, info)
Пример #15
0
def scrape_text(url):
    html = request.get(url)
    soup = BeautifulSoup(html, 'lxml')
    title = soup.find('h1', attrs={'id': 'firstHeading'})
    body = soup.find('div', attrs={'id': 'mw-content-text'})

    if title:
        title = title.text.strip()

    if body is None:
        return "Error reading the article"

    output = []
    for paragraph in body.find_all('p'):
        text = paragraph.text.strip()
        if len(text) > 4:  # skip empty paragraphs
            output.append(text)

    output = ' '.join(output)

    return output, title
Пример #16
0
def wordoftheday(inp):
    html = request.get('https://www.merriam-webster.com/word-of-the-day')
    soup = BeautifulSoup(html)

    word = soup.find('div', attrs={
        'class': 'word-and-pronunciation'
    }).find('h1').text
    paragraphs = soup.find('div', attrs={
        'class': 'wod-definition-container'
    }).find_all('p')

    definitions = []

    for paragraph in iterable.limit(4, paragraphs):
        definitions.append(paragraph.text.strip())

    output = u"The word of the day is \x02{}\x02: {}".format(
        word, '; '.join(definitions))

    if len(output) > 320:
        output = output[:320] + '... More at https://www.merriam-webster.com/word-of-the-day'

    return output
Пример #17
0
def koran(inp):
    "koran <chapter.verse> -- gets <chapter.verse> from the Koran. it can also search any text."

    url = 'https://quod.lib.umich.edu/cgi/k/koran/koran-idx?type=simple&q1=' + request.urlencode(inp)
    html = request.get(url)
    soup = BeautifulSoup(html, 'lxml')
    query = soup.find_all('li')

    if not query or len(query) == 0:
        return 'No results for ' + inp

    output = '[Koran] '
    lines = []

    for li in iterable.limit(4, query):
        lines.append(compress_whitespace(li.text))

    output = output + ' '.join(lines)

    if len(output) > 320:
        output = output[:320] + '...'

    return output
Пример #18
0
def define(inp):
    "define <word> -- Fetches definition of <word>."

    html = request.get(dict_url + request.urlencode(inp))
    soup = BeautifulSoup(html, 'lxml')

    definitions = soup.find_all('dd')

    if len(definitions) == 0:
        return "Definition not found"

    output = 'Definition of "' + inp + '":'

    # used to number the many definitions
    i = 1

    for definition in definitions:
        if 'article' in definition['class']:
            text = formatting.compress_whitespace(definition.text.strip())
            output = output + ' \x02' + text + '\x02'
            i = 1

        elif 'entry' in definition['class']:
            definition = definition.find('div', attrs={'class': 'definition'})
            text = formatting.compress_whitespace(definition.text.strip())
            output = output + text.replace(u'\xb0', ' \x02{}.\x02 '.format(i))
            i = i + 1

        # theres 'synonyms' and 'examples' too

    # arbitrary length limit
    if len(output) > 360:
        output = output[:
                        360] + '\x0f... More at https://en.wiktionary.org/wiki/' + inp

    return output
Пример #19
0
def etymology(inp):
    "etymology <word> -- Retrieves the etymology of <word>."

    html = request.get(eth_url + request.urlencode(inp))
    soup = BeautifulSoup(html, 'lxml')
    # the page uses weird class names like "section.word__definatieon--81fc4ae"
    # if it breaks change the selector to [class~="word_"]
    results = soup.select('div[class^="word"] section[class^="word__def"] > p')

    if len(results) == 0:
        return 'No etymology found for ' + inp

    output = u'Ethymology of "' + inp + '":'
    i = 1

    for result in results:
        text = formatting.compress_whitespace(result.text.strip())
        output = output + u' \x02{}.\x02 {}'.format(i, text)
        i = i + 1

    if len(output) > 400:
        output = output[:400] + '\x0f... More at https://www.etymonline.com/word/select'

    return output