def find_selector(child): """ :param child: a BeautifulSoup object :type child: BeautifulSoup :return: the selector list made of tags and attributes that child has :rtype: [[str]] """ if not child: return [] child_attrs = child.attrs selector = [escape(child.name)] if "id" in child_attrs.keys(): id_ = child_attrs["id"] if id_: selector.append( '[id="{}"]'.format(escape(id_)) ) # we do note use # because there is a bug with soupsieve: if the id starts with a number it wont work with a # if "class" in child_attrs.keys(): class_ = child_attrs["class"] if class_: selector.extend([".{}".format(escape(cls)) for cls in class_]) for attr in child_attrs.keys(): if attr and attr != "class" and attr != "id": selector.append("[{}]".format(escape(attr))) if not child.parent or child.parent.name == "[document]": return [selector] sel = find_selector(child.parent) sel.append(selector) return sel
def test_escape_wide_unicode(self): """Test handling of wide Unicode.""" self.assertEqual('Emoji\\ \U0001F60D', sv.escape('Emoji \U0001F60D'))
def test_escape_special(self): """Test escape special character.""" self.assertEqual(r'\{\}\[\]\ \(\)', sv.escape('{}[] ()'))
def test_escape_ctrl(self): """Test escape control character.""" self.assertEqual(r'\1 test', sv.escape('\x01test'))
def test_escape_null(self): """Test escape null character.""" self.assertEqual('\ufffdtest', sv.escape('\x00test'))
def test_escape_numbers(self): """Test escape hyphen cases.""" self.assertEqual(r'\33 ', sv.escape('3')) self.assertEqual(r'-\33 ', sv.escape('-3')) self.assertEqual(r'--3', sv.escape('--3'))
def test_escape_hyphen(self): """Test escape hyphen cases.""" self.assertEqual(r'\-', sv.escape('-')) self.assertEqual(r'--', sv.escape('--'))