Python striphtml示例，nemubot.tools.web.striphtml Python示例

示例#1

0

显示文件

def search(site, term, ssl=False):
    # Built URL
    url = "http%s://%s/w/api.php?format=json&action=query&list=search&srsearch=%s&srprop=titlesnippet|snippet" % (
        "s" if ssl else "", site, urllib.parse.quote(term))

    # Make the request
    data = web.getJSON(url)

    if data is not None and "query" in data and "search" in data["query"]:
        for itm in data["query"]["search"]:
            yield (web.striphtml(itm["titlesnippet"].replace("<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")),
                   web.striphtml(itm["snippet"].replace("<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")))

示例#2

0

显示文件

def get_cve(cve_id):
    search_url = BASEURL_NIST + quote(cve_id.upper())

    soup = BeautifulSoup(getURLContent(search_url))
    vuln = soup.body.find(class_="vuln-detail")
    cvss = vuln.findAll('div')[4]

    return [
        "Base score: " + cvss.findAll('div')[0].findAll('a')[0].text.strip(),
        vuln.findAll('p')[0].text, # description
        striphtml(vuln.findAll('div')[0].text).strip(), # publication date
        striphtml(vuln.findAll('div')[1].text).strip(), # last revised
    ]

示例#3

0

显示文件

def cmd_news(msg):
    if not len(msg.args):
        raise IMException("Indicate the URL to visit.")

    url = " ".join(msg.args)
    links = [x for x in find_rss_links(url)]
    if len(links) == 0: links = [ url ]

    res = Response(channel=msg.channel, nomore="No more news from %s" % url)
    for n in get_last_news(links[0]):
        res.append_message("%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title",
                                                       (n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate,
                                                       web.striphtml(n.summary) if n.summary else "",
                                                       n.link if n.link else ""))
    return res

示例#4

0

显示文件

def search(site, term, ssl=False, path="/w/api.php"):
    # Built URL
    url = "http%s://%s%s?format=json&action=query&list=search&srsearch=%s&srprop=titlesnippet|snippet" % (
        "s" if ssl else "", site, path, urllib.parse.quote(term))

    # Make the request
    data = web.getJSON(url)

    if data is not None and "query" in data and "search" in data["query"]:
        for itm in data["query"]["search"]:
            yield (web.striphtml(itm["titlesnippet"].replace(
                "<span class='searchmatch'>",
                "\x03\x02").replace("</span>", "\x03\x02")),
                   web.striphtml(itm["snippet"].replace(
                       "<span class='searchmatch'>",
                       "\x03\x02").replace("</span>", "\x03\x02")))

示例#5

0

显示文件

def cmd_book(msg):
    if not len(msg.args):
        raise IMException("please give me a title to search")

    book = get_book(" ".join(msg.args))
    if book is None:
        raise IMException("unable to find book named like this")
    res = Response(channel=msg.channel)
    res.append_message("%s, written by %s: %s" % (book.getElementsByTagName("title")[0].firstChild.nodeValue,
                                                 book.getElementsByTagName("author")[0].getElementsByTagName("name")[0].firstChild.nodeValue,
                                                 web.striphtml(book.getElementsByTagName("description")[0].firstChild.nodeValue if book.getElementsByTagName("description")[0].firstChild else "")))
    return res

示例#6

0

显示文件

文件： ctfs.py 项目： pombredanne/nemubot

def get_info_yt(msg):
    soup = BeautifulSoup(getURLContent(URL))

    res = Response(channel=msg.channel, nomore="No more upcoming CTF")

    for line in soup.body.find_all('tr'):
        n = line.find_all('td')
        if len(n) == 7:
            res.append_message("\x02%s:\x0F from %s type %s at %s. Weight: %s. %s%s" %
                               tuple([striphtml(x.text).strip() for x in n]))

    return res

示例#7

0

显示文件

文件： news.py 项目： pombredanne/nemubot

def cmd_news(msg):
    if not len(msg.args):
        raise IMException("Indicate the URL to visit.")

    url = " ".join(msg.args)
    links = [x for x in find_rss_links(url)]
    if len(links) == 0: links = [url]

    res = Response(channel=msg.channel,
                   nomore="No more news from %s" % url,
                   line_treat=reduce_inline)
    for n in get_last_news(links[0]):
        res.append_message(
            "%s published %s: %s %s" %
            (("\x02" + web.striphtml(n.title) +
              "\x0F") if n.title else "An article without title",
             (n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else
              "someday") if isinstance(n, AtomEntry) else n.pubDate,
             web.striphtml(n.summary) if n.summary else "",
             n.link if n.link else ""))

    return res

示例#8

0

显示文件

def cmd_book(msg):
    if not len(msg.args):
        raise IMException("please give me a title to search")

    book = get_book(" ".join(msg.args))
    if book is None:
        raise IMException("unable to find book named like this")
    res = Response(channel=msg.channel)
    res.append_message("%s, written by %s: %s" % (
        book.getElementsByTagName("title")[0].firstChild.nodeValue,
        book.getElementsByTagName("author")[0].getElementsByTagName(
            "name")[0].firstChild.nodeValue,
        web.striphtml(
            book.getElementsByTagName("description")[0].firstChild.nodeValue if
            book.getElementsByTagName("description")[0].firstChild else "")))
    return res

示例#9

0

显示文件

文件： conjugaison.py 项目： pombredanne/nemubot

def compute_line(line, stringTens):
    try:
        idTemps = d[stringTens]
    except:
        raise IMException("le temps demandé n'existe pas")

    if len(idTemps) == 0:
        raise IMException("le temps demandé n'existe pas")

    index = line.index('<div id="temps' + idTemps[0] + '\"')
    endIndex = line[index:].index('<div class=\"conjugBloc\"')

    endIndex += index
    newLine = line[index:endIndex]

    res = list()
    for elt in re.finditer("[p|/]>([^/]*/b>)", newLine):
        res.append(striphtml(elt.group(1)
                             .replace("<b>", "\x02")
                             .replace("</b>", "\x0F")))
    return res

示例#10

0

显示文件

def parse_wikitext(site, cnt, namespaces=dict(), ssl=False):
    for i, _, _, _ in re.findall(r"({{([^{]|\s|({{(.|\s|{{.*?}})*?}})*?)*?}})", cnt):
        cnt = cnt.replace(i, get_unwikitextified(site, i, ssl), 1)

    # Strip [[...]]
    for full, args, lnk in re.findall(r"(\[\[(.*?|)?([^|]*?)\]\])", cnt):
        ns = lnk.find(":")
        if lnk == "":
            cnt = cnt.replace(full, args[:-1], 1)
        elif ns > 0:
            namespace = lnk[:ns]
            if namespace in namespaces and namespaces[namespace]["canonical"] == "Category":
                cnt = cnt.replace(full, "", 1)
                continue
            cnt = cnt.replace(full, lnk, 1)
        else:
            cnt = cnt.replace(full, lnk, 1)

    # Strip HTML tags
    cnt = web.striphtml(cnt)

    return cnt

示例#11

0

显示文件

def parse_wikitext(site, cnt, namespaces=dict(), **kwargs):
    for i, _, _, _ in re.findall(r"({{([^{]|\s|({{(.|\s|{{.*?}})*?}})*?)*?}})",
                                 cnt):
        cnt = cnt.replace(i, get_unwikitextified(site, i, **kwargs), 1)

    # Strip [[...]]
    for full, args, lnk in re.findall(r"(\[\[(.*?|)?([^|]*?)\]\])", cnt):
        ns = lnk.find(":")
        if lnk == "":
            cnt = cnt.replace(full, args[:-1], 1)
        elif ns > 0:
            namespace = lnk[:ns]
            if namespace in namespaces and namespaces[namespace][
                    "canonical"] == "Category":
                cnt = cnt.replace(full, "", 1)
                continue
            cnt = cnt.replace(full, lnk, 1)
        else:
            cnt = cnt.replace(full, lnk, 1)

    # Strip HTML tags
    cnt = web.striphtml(cnt)

    return cnt

示例#12

0

显示文件

 def answer(self):
     if "Answer" not in self.ddgres or not self.ddgres["Answer"]:
         return None
     return web.striphtml(self.ddgres["Answer"])

示例#13

0

显示文件

 def answer(self):
     if "Answer" not in self.ddgres or not self.ddgres["Answer"]:
         return None
     return web.striphtml(self.ddgres["Answer"])