Пример #1
0
def text(html):
    """html to text dumb converter

    cargo-culted from etymology.py"""
    html = r_tag.sub("", html)
    html = r_whitespace.sub(" ", html)
    return web.decode(html.strip())
Пример #2
0
def tr(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    in_lang, out_lang, phrase = trigger.groups()

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    if phrase.strip() == '':
        return bot.reply('You need to specify a string for me to translate!')

    in_lang = in_lang or 'auto'
    out_lang = out_lang or 'en'

    if in_lang != out_lang:
        msg, in_lang = translate(phrase, in_lang, out_lang)
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, in_lang,
                                                             out_lang)
        else:
            msg = 'The %s to %s translation failed, are you sure you specified valid language abbreviations?' % (
                in_lang, out_lang)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #3
0
def find_title(url, verify=True):
    """Return the title for the given URL."""
    response = requests.get(url,
                            stream=True,
                            verify=verify,
                            headers=default_headers)
    try:
        content = b''
        for byte in response.iter_content(chunk_size=512):
            content += byte
            if b'</title>' in content or len(content) > max_bytes:
                break
        content = content.decode('utf-8', errors='ignore')
    finally:
        # need to close the connexion because we have not read all the data
        response.close()

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #4
0
def find_title(url, verify=True):
    """Return the title for the given URL."""
    response = requests.get(url, stream=True, verify=verify, headers=default_headers)
    try:
        content = b""
        for byte in response.iter_content(chunk_size=512):
            content += byte
            if b"</title>" in content or len(content) > max_bytes:
                break
        content = content.decode("utf-8", errors="ignore")
    finally:
        # need to close the connexion because we have not read all the data
        response.close()

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r"<\1title>", content)
    content = quoted_title.sub("", content)

    start = content.find("<title>")
    end = content.find("</title>")
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7 : end])
    title = title.strip()[:200]

    title = " ".join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub("", title)

    return title or None
Пример #5
0
def find_title(url=None, content=None):
    """Return the title for the given URL.

    Copy of find_title that allows for avoiding duplicate requests."""
    if (not content and not url) or (content and url):
        raise ValueError("url *or* content needs to be provided to find_title")
    if url:
        try:
            content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
        except UnicodeDecodeError:
            return  # Fail silently when data can't be decoded
    assert content

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r"<\1title>", content)
    content = quoted_title.sub("", content)

    start = content.find("<title>")
    end = content.find("</title>")
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7 : end])
    title = title.strip()[:200]

    title = " ".join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub("", title)

    return title or None
Пример #6
0
def find_title(url=None, content=None):
    """Return the title for the given URL.

    Copy of find_title that allows for avoiding duplicate requests."""
    if (not content and not url) or (content and url):
        raise ValueError('url *or* content needs to be provided to find_title')
    if url:
        try:
            content, headers = web.get(url,
                                       return_headers=True,
                                       limit_bytes=max_bytes)
        except UnicodeDecodeError:
            return  # Fail silently when data can't be decoded
    assert content

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #7
0
def find_title(url):
    """Return the title for the given URL."""
    try:
        content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
    except UnicodeDecodeError:
        return  # Fail silently when data can't be decoded

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #8
0
def find_title(url):
    """Return the title for the given URL."""
    response = requests.get(url, stream=True)
    try:
        content = ''
        for byte in response.iter_content(chunk_size=512, decode_unicode=True):
            content += str(byte)
            if '</title>' in content or len(content) > max_bytes:
                break
    except UnicodeDecodeError:
        return  # Fail silently when data can't be decoded
    finally:
        # need to close the connexion because we have not read all the data
        response.close()

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #9
0
def find_title(url):
    """Return the title for the given URL."""
    try:
        content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
    except UnicodeDecodeError:
        return  # Fail silently when data can't be decoded

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #10
0
def text(html):
    '''html to text dumb converter

    cargo-culted from etymology.py'''
    html = r_tag.sub('', html)
    html = r_whitespace.sub(' ', html)
    return web.decode(html.strip())
Пример #11
0
def tr(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    in_lang, out_lang, phrase = trigger.groups()

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    if phrase.strip() == '':
        return bot.reply('You need to specify a string for me to translate!')

    in_lang = in_lang or 'auto'
    out_lang = out_lang or 'en'

    if in_lang != out_lang:
        msg, in_lang = translate(phrase, in_lang, out_lang)
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, in_lang, out_lang)
        else:
            msg = 'The %s to %s translation failed, are you sure you specified valid language abbreviations?' % (in_lang, out_lang)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #12
0
def fetch_character(query):
    if not query:
        return "No search query provided."

    try:
        character = requests.get(api + cFilter % query, timeout=(10.0, 4.0))
    except requests.exceptions.ConnectTimeout:
        return "Connection timed out."
    except requests.exceptions.ConnectionError:
        return "Could not connect to server."
    except requests.exceptions.ReadTimeout:
        return "Server took too long to reply."

    try:
        character.raise_for_status()
    except requests.exceptions.HTTPError as e:
        return "HTTP error: " + e.message

    try:
        Data = character.json()
    except ValueError:
        return character.content

    try:
        Entry = Data['data'][0]
        name = Entry['attributes'].get('name')
        description = web.decode(
            bleach.clean(Entry['attributes'].get('description').replace(
                '<br/>', ' ').replace('<br>', ' '),
                         strip=True))
    except IndexError:
        return "No results found."

    return "{name} - Description: {description}".format(
        name=name, description=description)
Пример #13
0
def find_title(url, verify=True):
    """Return the title for the given URL."""
    try:
        response = requests.get(url, stream=True, verify=verify,
                                headers=default_headers)
        content = b''
        for byte in response.iter_content(chunk_size=512):
            content += byte
            if b'</title>' in content or len(content) > max_bytes:
                break
        content = content.decode('utf-8', errors='ignore')
        # Need to close the connection because we have not read all
        # the data
        response.close()
    except requests.exceptions.ConnectionError:
        return None

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.rfind('<title>')
    end = content.rfind('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #14
0
def duck_search(query):
    query = query.replace('!', '')
    query = web.quote(query)
    uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
    bytes = web.get(uri)
    m = r_duck.search(bytes)
    if m:
        return web.decode(m.group(1))
Пример #15
0
def text(html):
    text = r_tag.sub('', html).strip()
    text = text.replace('\n', ' ')
    text = text.replace('\r', '')
    text = text.replace('(intransitive', '(intr.')
    text = text.replace('(transitive', '(trans.')
    text = web.decode(text)
    return text
Пример #16
0
def text(html):
    text = r_sup.sub('', html)  # Remove superscripts that are references from definition
    text = r_tag.sub('', text).strip()
    text = text.replace('\n', ' ')
    text = text.replace('\r', '')
    text = text.replace('(intransitive', '(intr.')
    text = text.replace('(transitive', '(trans.')
    text = web.decode(text)
    return text
Пример #17
0
def text(html):
    text = r_sup.sub('', html)  # Remove superscripts that are references from definition
    text = r_tag.sub('', text).strip()
    text = text.replace('\n', ' ')
    text = text.replace('\r', '')
    text = text.replace('(intransitive', '(intr.')
    text = text.replace('(transitive', '(trans.')
    text = web.decode(text)
    return text
Пример #18
0
def duck_search(query):
    query = query.replace('!', '')
    uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
    bytes = web.get(uri)
    if 'web-result"' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result"')[1]
    m = r_duck.search(bytes)
    if m:
        return web.decode(m.group(1))
Пример #19
0
def duck_search(query):
    query = query.replace('!', '')
    uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
    bytes = web.get(uri)
    if 'web-result"' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result"')[1]
    m = r_duck.search(bytes)
    if m:
        return web.decode(m.group(1))
Пример #20
0
def duck_search(query):
    query = query.replace('!', '')
    uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query
    bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})
    if 'web-result' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result')[1]
    m = r_duck.search(bytes)
    if m:
        unquoted_m = unquote(m.group(1))
        return web.decode(unquoted_m)
Пример #21
0
def duck_search(query):
    query = query.replace('!', '')
    uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query
    bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})
    if 'web-result' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result')[1]
    m = r_duck.search(bytes)
    if m:
        unquoted_m = unquote(m.group(1))
        return web.decode(unquoted_m)
Пример #22
0
def duck_search(query):
    query = query.replace("!", "")
    uri = "http://duckduckgo.com/html/?q=%s&kl=uk-en" % query
    bytes = web.get(uri)
    #    if 'web-result"' in bytes:  # filter out the ads on top of the page
    #        bytes = bytes.split('web-result"')[1]
    #    m = r_duck.search(bytes)
    #    if m:
    #        return web.decode(m.group(1))
    urls = [web.decode(x) for x in r_duck.findall(bytes)]
    return urls
Пример #23
0
def duck_search(query):
    query = query.replace('!', '')
    uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
    bytes = web.get(uri)
    #    if 'web-result"' in bytes:  # filter out the ads on top of the page
    #        bytes = bytes.split('web-result"')[1]
    #    m = r_duck.search(bytes)
    #    if m:
    #        return web.decode(m.group(1))
    urls = [web.decode(x) for x in r_duck.findall(bytes)]
    return urls
Пример #24
0
Файл: tld.py Проект: nsnw/sopel
def gettld(bot, trigger):
    """Show information about the given Top Level Domain."""
    page = requests.get(uri).text
    tld = trigger.group(2)
    if not tld:
        bot.reply("You must provide a top-level domain to search.")
        return  # Stop if no tld argument is provided
    if tld[0] == '.':
        tld = tld[1:]
    search = r'(?i)<td><a href="\S+" title="\S+">\.{0}</a></td>\n(<td><a href=".*</a></td>\n)?<td>([A-Za-z0-9].*?)</td>\n<td>(.*)</td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n'
    search = search.format(tld)
    re_country = re.compile(search)
    matches = re_country.findall(page)
    if not matches:
        search = r'(?i)<td><a href="\S+" title="(\S+)">\.{0}</a></td>\n<td><a href=".*">(.*)</a></td>\n<td>([A-Za-z0-9].*?)</td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n'
        search = search.format(tld)
        re_country = re.compile(search)
        matches = re_country.findall(page)
    if matches:
        matches = list(matches[0])
        i = 0
        while i < len(matches):
            matches[i] = r_tag.sub("", matches[i])
            i += 1
        desc = matches[2]
        if len(desc) > 400:
            desc = desc[:400] + "..."
        reply = "%s -- %s. IDN: %s, DNSSEC: %s" % (
            matches[1], desc, matches[3], matches[4]
        )
    else:
        search = r'<td><a href="\S+" title="\S+">.{0}</a></td>\n<td><span class="flagicon"><img.*?\">(.*?)</a></td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n<td[^>]*>(.*?)</td>\n'
        search = search.format(unicode(tld))
        re_country = re.compile(search)
        matches = re_country.findall(page)
        if matches:
            matches = matches[0]
            dict_val = dict()
            dict_val["country"], dict_val["expl"], dict_val["notes"], dict_val["idn"], dict_val["dnssec"], dict_val["sld"] = matches
            for key in dict_val:
                if dict_val[key] == "&#160;":
                    dict_val[key] = "N/A"
                dict_val[key] = r_tag.sub('', dict_val[key])
            if len(dict_val["notes"]) > 400:
                dict_val["notes"] = dict_val["notes"][:400] + "..."
            reply = "%s (%s, %s). IDN: %s, DNSSEC: %s, SLD: %s" % (dict_val["country"], dict_val["expl"], dict_val["notes"], dict_val["idn"], dict_val["dnssec"], dict_val["sld"])
        else:
            reply = "No matches found for TLD: {0}".format(unicode(tld))
    # Final touches + output
    reply = web.decode(reply)
    bot.reply(reply)
Пример #25
0
def find_title(url):
    """Return the title for the given URL."""
    response = requests.get(url,
                            headers={'User-Agent': 'Sopel IRC Syrup'},
                            stream=True,
                            verify=True)
    bs = BeautifulSoup(response.content, "html.parser")
    if bs.find("meta", property="og:title"):
        ogtitle = bs.find("meta", property="og:title")['content']
        #return title
        if bs.title:
            ttitle = bs.title.text
            if len(ttitle) > len(ogtitle):
                return ttitle.strip()
            else:
                return ogtitle.strip()
    try:
        content = ''
        for byte in response.iter_content(chunk_size=512, decode_unicode=True):
            if not isinstance(byte, bytes):
                content += byte
            else:
                break
            if '</title>' in content or len(content) > max_bytes:
                break
    except UnicodeDecodeError:
        return  # Fail silently when data can't be decoded
    finally:
        # need to close the connexion because we have not read all the data
        response.close()

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)
    title = title.replace("[apos]", "'")
    return title or None
Пример #26
0
def tr2(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    command = trigger.group(2)

    if not command:
        return bot.reply('You did not give me anything to translate')

    def langcode(p):
        return p.startswith(':') and (2 < len(p) < 10) and p[1:].isalpha()

    args = ['auto', 'en']

    for i in range(2):
        if ' ' not in command:
            break
        prefix, cmd = command.split(' ', 1)
        if langcode(prefix):
            args[i] = prefix[1:]
            command = cmd
    phrase = command

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    if phrase.strip() == '':
        return bot.reply('You need to specify a string for me to translate!')

    src, dest = args
    if src != dest:
        msg, src = translate(phrase,
                             src,
                             dest,
                             verify_ssl=bot.config.core.verify_ssl)
        if not src:
            return bot.say(
                "Translation failed, probably because of a rate-limit.")
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, src, dest)
        else:
            msg = 'The %s to %s translation failed, are you sure you specified valid language abbreviations?' % (
                src, dest)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #27
0
def duck_search(query):
    query = query.replace('!', '')
    base = 'https://duckduckgo.com/html/'
    parameters = {
        'kl': 'us-en',
        'q': query,
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    }
    bytes = requests.get(base, parameters, headers=headers).text
    if 'web-result' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result')[1]
    m = r_duck.search(bytes)
    if m:
        unquoted_m = unquote(m.group(1))
        return web.decode(unquoted_m)
Пример #28
0
def duck_search(query):
    query = query.replace('!', '')
    base = 'https://duckduckgo.com/html/'
    parameters = {
        'kl': 'us-en',
        'q': query,
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    }
    bytes = requests.get(base, parameters, headers=headers).text
    if 'web-result' in bytes:  # filter out the adds on top of the page
        bytes = bytes.split('web-result')[1]
    m = r_duck.search(bytes)
    if m:
        unquoted_m = unquote(m.group(1))
        return web.decode(unquoted_m)
Пример #29
0
def tr2(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    command = trigger.group(2)

    if not command:
        return bot.reply('You did not give me anything to translate')

    def langcode(p):
        return p.startswith(':') and (2 < len(p) < 10) and p[1:].isalpha()

    args = ['auto', 'en']

    for i in range(2):
        if ' ' not in command:
            break
        prefix, cmd = command.split(' ', 1)
        if langcode(prefix):
            args[i] = prefix[1:]
            command = cmd
    phrase = command

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    if phrase.strip() == '':
        return bot.reply('You need to specify a string for me to translate!')

    src, dest = args
    if src != dest:
        msg, src = translate(phrase, src, dest,
                             verify_ssl=bot.config.core.verify_ssl)
        if not src:
            return bot.say("Translation failed, probably because of a rate-limit.")
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, src, dest)
        else:
            msg = 'The %s to %s translation failed, are you sure you specified valid language abbreviations?' % (src, dest)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #30
0
def tr2(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    command = trigger.group(2)

    if not command:
        return bot.reply('You did not give me anything to translate')

    def langcode(p):
        return p.startswith(':') and (2 < len(p) < 10) and p[1:].isalpha()

    args = ['auto', 'en']

    for i in range(2):
        if ' ' not in command:
            break
        prefix, cmd = command.split(' ', 1)
        if langcode(prefix):
            args[i] = prefix[1:]
            command = cmd
    phrase = command

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    src, dest = args
    if src != dest:
        msg, src = translate(phrase, src, dest)
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, src, dest)
        else:
            msg = 'The %s to %s translation failed, sorry!' % (src, dest)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #31
0
def tr2(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    command = trigger.group(2)

    if not command:
        return bot.reply('You did not give me anything to translate')

    def langcode(p):
        return p.startswith(':') and (2 < len(p) < 10) and p[1:].isalpha()

    args = ['auto', 'en']

    for i in range(2):
        if ' ' not in command:
            break
        prefix, cmd = command.split(' ', 1)
        if langcode(prefix):
            args[i] = prefix[1:]
            command = cmd
    phrase = command

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    src, dest = args
    if src != dest:
        msg, src = translate(phrase, src, dest)
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s, translate.google.com)' % (msg, src, dest)
        else:
            msg = 'The %s to %s translation failed, sorry!' % (src, dest)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #32
0
def tr(bot, trigger):
    """Translates a phrase, with an optional language hint."""
    in_lang, out_lang, phrase = trigger.groups()

    if (len(phrase) > 350) and (not trigger.admin):
        return bot.reply('Phrase must be under 350 characters.')

    in_lang = in_lang or 'auto'
    out_lang = out_lang or 'en'

    if in_lang != out_lang:
        msg, in_lang = translate(phrase, in_lang, out_lang)
        if sys.version_info.major < 3 and isinstance(msg, str):
            msg = msg.decode('utf-8')
        if msg:
            msg = web.decode(msg)  # msg.replace('&#39;', "'")
            msg = '"%s" (%s to %s)' % (msg, in_lang, out_lang)
        else:
            msg = 'The %s to %s translation failed, sorry!' % (in_lang, out_lang)

        bot.reply(msg)
    else:
        bot.reply('Language guessing failed, so try suggesting one!')
Пример #33
0
def find_title(url):
    """Return the title for the given URL."""
    content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
    content_type = headers.get('Content-Type') or ''
    encoding_match = re.match('.*?charset *= *(\S+)', content_type)
    # If they gave us something else instead, try that
    if encoding_match:
        try:
            content = content.decode(encoding_match.group(1))
        except:
            encoding_match = None
    # They didn't tell us what they gave us, so go with UTF-8 or fail silently.
    if not encoding_match:
        try:
            content = content.decode('utf-8')
        except:
            return

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #34
0
Файл: url.py Проект: zeamp/sopel
def find_title(url):
    """Return the title for the given URL."""
    response = requests.get(url, stream=True)
    try:
        content = ''
        for byte in response.iter_content(chunk_size=512, decode_unicode=True):
            if not isinstance(byte, bytes):
                content += byte
            else:
                break
            if '</title>' in content or len(content) > max_bytes:
                break
    except UnicodeDecodeError:
        return  # Fail silently when data can't be decoded
    finally:
        # need to close the connexion because we have not read all the data
        response.close()

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.find('<title>')
    end = content.find('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None
Пример #35
0
def find_title(url, verify=True):
    """Return the title for the given URL."""

    # special cases

    # youtube
    for i in range(len(YOUTUBE)):
        if url[0:len(YOUTUBE[i])] == YOUTUBE[i]:
            try:
                response = urllib.request.urlopen(
                    'https://noembed.com/embed?url=' + url)
                response_bytes = response.read()
                response_string = response_bytes.decode("utf8")
                response.close()

                youtube_info = json.loads(response_string)

                return youtube_info['title'] + " | lataaja: " + youtube_info[
                    'author_name'] + " | YouTube "
            except:
                print("not a valid URL")

    # twatter
    for i in range(len(TWITTER)):
        if url != TWITTER[i] and url[0:len(TWITTER[i])] == TWITTER[i]:
            # // use open sores twatter client 'Nitter' to get the title
            url = 'https://nitter.net' + url[len(TWITTER[i]):]

    # end of special cases

    try:
        response = requests.get(url,
                                stream=True,
                                verify=verify,
                                headers=default_headers)
        content = b''
        for byte in response.iter_content(chunk_size=512):
            content += byte
            if b'</title>' in content or len(content) > max_bytes:
                break
        content = content.decode('utf-8', errors='ignore')
        # Need to close the connection because we have not read all
        # the data
        response.close()
    except requests.exceptions.ConnectionError:
        return None

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.rfind('<title>')
    end = content.rfind('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    # twatter hack
    title = "Twitter".join(title.rsplit("nitter", 1))

    return title or None