def search(site, term, ssl=False): # Built URL url = "http%s://%s/w/api.php?format=json&action=query&list=search&srsearch=%s&srprop=titlesnippet|snippet" % ( "s" if ssl else "", site, urllib.parse.quote(term)) # Make the request data = web.getJSON(url) if data is not None and "query" in data and "search" in data["query"]: for itm in data["query"]["search"]: yield (web.striphtml(itm["titlesnippet"].replace("<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")), web.striphtml(itm["snippet"].replace("<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")))
def get_cve(cve_id): search_url = BASEURL_NIST + quote(cve_id.upper()) soup = BeautifulSoup(getURLContent(search_url)) vuln = soup.body.find(class_="vuln-detail") cvss = vuln.findAll('div')[4] return [ "Base score: " + cvss.findAll('div')[0].findAll('a')[0].text.strip(), vuln.findAll('p')[0].text, # description striphtml(vuln.findAll('div')[0].text).strip(), # publication date striphtml(vuln.findAll('div')[1].text).strip(), # last revised ]
def cmd_news(msg): if not len(msg.args): raise IMException("Indicate the URL to visit.") url = " ".join(msg.args) links = [x for x in find_rss_links(url)] if len(links) == 0: links = [ url ] res = Response(channel=msg.channel, nomore="No more news from %s" % url) for n in get_last_news(links[0]): res.append_message("%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title", (n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate, web.striphtml(n.summary) if n.summary else "", n.link if n.link else "")) return res
def search(site, term, ssl=False, path="/w/api.php"): # Built URL url = "http%s://%s%s?format=json&action=query&list=search&srsearch=%s&srprop=titlesnippet|snippet" % ( "s" if ssl else "", site, path, urllib.parse.quote(term)) # Make the request data = web.getJSON(url) if data is not None and "query" in data and "search" in data["query"]: for itm in data["query"]["search"]: yield (web.striphtml(itm["titlesnippet"].replace( "<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")), web.striphtml(itm["snippet"].replace( "<span class='searchmatch'>", "\x03\x02").replace("</span>", "\x03\x02")))
def cmd_book(msg): if not len(msg.args): raise IMException("please give me a title to search") book = get_book(" ".join(msg.args)) if book is None: raise IMException("unable to find book named like this") res = Response(channel=msg.channel) res.append_message("%s, written by %s: %s" % (book.getElementsByTagName("title")[0].firstChild.nodeValue, book.getElementsByTagName("author")[0].getElementsByTagName("name")[0].firstChild.nodeValue, web.striphtml(book.getElementsByTagName("description")[0].firstChild.nodeValue if book.getElementsByTagName("description")[0].firstChild else ""))) return res
def get_info_yt(msg): soup = BeautifulSoup(getURLContent(URL)) res = Response(channel=msg.channel, nomore="No more upcoming CTF") for line in soup.body.find_all('tr'): n = line.find_all('td') if len(n) == 7: res.append_message("\x02%s:\x0F from %s type %s at %s. Weight: %s. %s%s" % tuple([striphtml(x.text).strip() for x in n])) return res
def cmd_news(msg): if not len(msg.args): raise IMException("Indicate the URL to visit.") url = " ".join(msg.args) links = [x for x in find_rss_links(url)] if len(links) == 0: links = [url] res = Response(channel=msg.channel, nomore="No more news from %s" % url, line_treat=reduce_inline) for n in get_last_news(links[0]): res.append_message( "%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title", (n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate, web.striphtml(n.summary) if n.summary else "", n.link if n.link else "")) return res
def cmd_book(msg): if not len(msg.args): raise IMException("please give me a title to search") book = get_book(" ".join(msg.args)) if book is None: raise IMException("unable to find book named like this") res = Response(channel=msg.channel) res.append_message("%s, written by %s: %s" % ( book.getElementsByTagName("title")[0].firstChild.nodeValue, book.getElementsByTagName("author")[0].getElementsByTagName( "name")[0].firstChild.nodeValue, web.striphtml( book.getElementsByTagName("description")[0].firstChild.nodeValue if book.getElementsByTagName("description")[0].firstChild else ""))) return res
def compute_line(line, stringTens): try: idTemps = d[stringTens] except: raise IMException("le temps demandé n'existe pas") if len(idTemps) == 0: raise IMException("le temps demandé n'existe pas") index = line.index('<div id="temps' + idTemps[0] + '\"') endIndex = line[index:].index('<div class=\"conjugBloc\"') endIndex += index newLine = line[index:endIndex] res = list() for elt in re.finditer("[p|/]>([^/]*/b>)", newLine): res.append(striphtml(elt.group(1) .replace("<b>", "\x02") .replace("</b>", "\x0F"))) return res
def parse_wikitext(site, cnt, namespaces=dict(), ssl=False): for i, _, _, _ in re.findall(r"({{([^{]|\s|({{(.|\s|{{.*?}})*?}})*?)*?}})", cnt): cnt = cnt.replace(i, get_unwikitextified(site, i, ssl), 1) # Strip [[...]] for full, args, lnk in re.findall(r"(\[\[(.*?|)?([^|]*?)\]\])", cnt): ns = lnk.find(":") if lnk == "": cnt = cnt.replace(full, args[:-1], 1) elif ns > 0: namespace = lnk[:ns] if namespace in namespaces and namespaces[namespace]["canonical"] == "Category": cnt = cnt.replace(full, "", 1) continue cnt = cnt.replace(full, lnk, 1) else: cnt = cnt.replace(full, lnk, 1) # Strip HTML tags cnt = web.striphtml(cnt) return cnt
def parse_wikitext(site, cnt, namespaces=dict(), **kwargs): for i, _, _, _ in re.findall(r"({{([^{]|\s|({{(.|\s|{{.*?}})*?}})*?)*?}})", cnt): cnt = cnt.replace(i, get_unwikitextified(site, i, **kwargs), 1) # Strip [[...]] for full, args, lnk in re.findall(r"(\[\[(.*?|)?([^|]*?)\]\])", cnt): ns = lnk.find(":") if lnk == "": cnt = cnt.replace(full, args[:-1], 1) elif ns > 0: namespace = lnk[:ns] if namespace in namespaces and namespaces[namespace][ "canonical"] == "Category": cnt = cnt.replace(full, "", 1) continue cnt = cnt.replace(full, lnk, 1) else: cnt = cnt.replace(full, lnk, 1) # Strip HTML tags cnt = web.striphtml(cnt) return cnt
def answer(self): if "Answer" not in self.ddgres or not self.ddgres["Answer"]: return None return web.striphtml(self.ddgres["Answer"])