def find(self, selector): """Find elements using selector traversing down from self. >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>' >>> d = PyQuery(m) >>> d('p').find('em') [<em>, <em>] >>> d('p').eq(1).find('em') [<em>] """ xpath = selector_to_xpath(selector) results = [child.xpath(xpath) for tag in self for child in tag.getchildren()] # Flatten the results elements = [] for r in results: elements.extend(r) return self.__class__(elements, **dict(parent=self))
def _filter_only(self, selector, elements, reverse=False, unique=False): """Filters the selection set only, as opposed to also including descendants. """ if selector is None: results = elements else: xpath = selector_to_xpath(selector, 'self::') results = [] for tag in elements: results.extend(tag.xpath(xpath)) if reverse: results.reverse() if unique: result_list = results results = [] for item in result_list: if not item in results: results.append(item) return self.__class__(results, **dict(parent=self))
if isinstance(context, basestring): try: elements = fromstring(context, self.parser) except Exception, e: raise ValueError('%r, %s' % (e, context)) elif isinstance(context, self.__class__): # copy elements = context[:] elif isinstance(context, list): elements = context elif isinstance(context, etree._Element): elements = [context] # select nodes if elements and selector is not no_default: xpath = selector_to_xpath(selector) results = [tag.xpath(xpath) for tag in elements] # Flatten the results elements = [] for r in results: elements.extend(r) list.__init__(self, elements) def __call__(self, *args): """return a new PyQuery instance """ length = len(args) if length == 0: raise ValueError('You must provide at least a selector') if args[0] == '':
if isinstance(context, basestring): try: elements = fromstring(context, parser) except Exception, e: raise ValueError('%r, %s' % (e, context)) elif isinstance(context, self.__class__): # copy elements = context[:] elif isinstance(context, list): elements = context elif isinstance(context, etree._Element): elements = [context] # select nodes if elements and selector is not no_default: xpath = selector_to_xpath(selector) results = [tag.xpath(xpath) for tag in elements] # Flatten the results elements = [] for r in results: elements.extend(r) list.__init__(self, elements) def __call__(self, *args): """return a new PyQuery instance """ length = len(args) if length == 0: raise ValueError('You must provide at least a selector') if len(args) == 1 and not args[0].startswith('<'):
def __init__(self, *args, **kwargs): html = None elements = [] self._base_url = None self.parser = kwargs.get("parser", None) if "parser" in kwargs: del kwargs["parser"] if ( len(args) >= 1 and (not PY3k and isinstance(args[0], basestring) or PY3k and isinstance(args[0], str)) and args[0].startswith("http://") ): kwargs["url"] = args[0] if len(args) >= 2: kwargs["data"] = args[1] args = [] if "parent" in kwargs: self._parent = kwargs.pop("parent") else: self._parent = no_default namespaces = kwargs.get("namespaces", {}) if "namespaces" in kwargs: del kwargs["namespaces"] if kwargs: # specific case to get the dom if "filename" in kwargs: html = open(kwargs["filename"]) elif "url" in kwargs: url = kwargs.pop("url") if "opener" in kwargs: opener = kwargs.pop("opener") html = opener(url) else: method = kwargs.get("method") data = kwargs.get("data") if type(data) in (dict, list, tuple): data = urlencode(data) if isinstance(method, basestring) and method.lower() == "get" and data: if "?" not in url: url += "?" elif url[-1] not in ("?", "&"): url += "&" url += data data = None if data and PY3k: data = data.encode("utf-8") html = urlopen(url, data) if not self.parser: self.parser = "html" self._base_url = url else: raise ValueError("Invalid keyword arguments %s" % kwargs) elements = fromstring(html, self.parser) else: # get nodes # determine context and selector if any selector = context = no_default length = len(args) if len(args) == 1: context = args[0] elif len(args) == 2: selector, context = args else: raise ValueError("You can't do that." + " Please, provide arguments") # get context if isinstance(context, basestring): try: elements = fromstring(context, self.parser) except Exception: raise ValueError(repr(context)) elif isinstance(context, self.__class__): # copy elements = context[:] elif isinstance(context, list): elements = context elif isinstance(context, etree._Element): elements = [context] # select nodes if elements and selector is not no_default: xpath = selector_to_xpath(selector) results = [tag.xpath(xpath, namespaces=namespaces) for tag in elements] # Flatten the results elements = [] for r in results: elements.extend(r) list.__init__(self, elements)