class Py_Query: pyquery: PyQuery def __init__(self, html='', pyquery=None, selector=None): self.original_html = html self.error = None self.pyquery = pyquery or self.set_pyquery_from_html(html) if selector: self.pyquery = self.query(selector).pyquery def attribute(self, key): return self.attributes().get(key) def attribute_base64(self, key): value = self.attributes().get(key) return base64_to_str(value) def attribute_base64_py_query(self, key): html = self.attribute_base64(key) return Py_Query(html) def attribute_base64_json(self, key): return json_parse(self.attribute_base64(key)) @index_by def attributes(self): if self.pyquery is None: return [] if self.single_element(): return self.pyquery[0].attrib result = [] for item in self.pyquery: result.append(item.attrib) return result def children(self, selector='*'): result = [] if self.pyquery is not None: for pyquery in self.pyquery.children(selector).items(): result.append(Py_Query(pyquery=pyquery)) return result def dom(self, show_attributes=True, use_children_node=True): return Py_Query_Dom(self, show_attributes=show_attributes, use_children_node=use_children_node).dom() def elements(self, selector='*'): if self.pyquery is None: return [] return self.query(selector).items() def empty(self): return self.size() == 0 def html(self): if self.pyquery: return self.outer_html() return '' def inner_html(self): if self.pyquery: return self.pyquery.html() return '' def items(self): result = [] if self.pyquery: for item in self.pyquery.items(): result.append(Py_Query(pyquery=item)) return result def indexed_by_attribute(self, attribute_name, return_unique_list=False): result = {} items = self.items() for item in items: attribute_value = item.attribute(attribute_name) if attribute_value: result[attribute_value] = item if return_unique_list: return list_set(result) return result def print(self): return Py_Query_Dom(self).print() def outer_html(self): if self.pyquery: return self.pyquery.outer_html() return '' def query(self, selector='*'): if self.pyquery: try: self.error = None pyquery = self.pyquery(selector) return Py_Query(pyquery=pyquery) except Exception as error: self.error = error return Py_Query('') def query_html(self, selector='*'): return self.query(selector=selector).html() def set_pyquery_from_html(self, html): self.pyquery = None self.error = None if type(html) is str: try: self.pyquery = PyQuery(html) return self.pyquery except Exception as error: self.error = error else: self.error = Exception("Invalid html value") def size(self): if self.pyquery: return len(self.pyquery) return 0 def set_value(self, selector, value): if self.pyquery: matches = [] for item in self.query(selector).items(): item.value(value) matches.append(item) return matches #target = self.query(selector) #return self.pyquery(selector).val(value) return [] def single_element(self): return self.size() == 1 def serialize_to_form(self): return self.pyquery.serialize() def scripts( self ): # todo: move to separate class (one more focused the page's content) return self.query('script').indexed_by_attribute('src') def tag(self): if self.size() == 0: return '' return self.pyquery[ 0].tag # todo double check the use of [0] in the code below def tags(self): result = [] if self.pyquery is not None: for item in self.pyquery: result.append(item.tag) return result def text(self, selector=None): if self.pyquery: if selector: return self.query(selector).text() return self.pyquery.text() return '' def value(self, value=None): if self.pyquery: if value: self.pyquery.val(value) return self def __repr__(self): return f'(Py_Query) tag: { self.tag()} | size: {self.size()} | attributes: {len(self.attributes())} | elements: {len(self.elements())} | children: {len(self.children())} \n\n {self.html()}' # todo: move to separate class (one more focused the page's content) # misc html helpers and page content def body(self): return self.query('body') def title(self): return self.text('title')