Python HtmlElement.append примеры использования

Язык программирования: Python

Пространство имен/Пакет: lxml.html

Класс/Тип: HtmlElement

Метод/Функция: append

Примеров на hotexamples.com: 5

Python HtmlElement.append - 5 примеров найдено. Это лучшие примеры Python кода для lxml.html.HtmlElement.append, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

HtmlElement(6)

append(4)

get_element_by_id(4)

addnext(1)

Пример #1

Показать файл

Файл: content.py Проект: rivingtondigital/goose3

    def calculate_best_node(self, doc):
        top_node = None
        nodes_to_check = self.nodes_to_check(doc)

        starting_boost = float(1.0)
        cnt = 0
        i = 0
        parent_nodes = []
        nodes_with_text = []

        for node in nodes_to_check:
            text_node = self.parser.getText(node)
            word_stats = self.stopwords_class(
                language=self.get_language()).get_stopword_count(text_node)
            high_link_density = self.is_highlink_density(node)
            if word_stats.get_stopword_count() > 2 and not high_link_density:
                nodes_with_text.append(node)

        nodes_number = len(nodes_with_text)
        negative_scoring = 0
        bottom_negativescore_nodes = float(nodes_number) * 0.25

        dupe_track = []
        for node in nodes_with_text:
            boost_score = float(0)
            # boost
            if self.is_boostable(node):
                if cnt >= 0:
                    boost_score = float((1.0 / starting_boost) * 50)
                    starting_boost += 1
            # nodes_number
            if nodes_number > 15:
                if (nodes_number - i) <= bottom_negativescore_nodes:
                    booster = float(bottom_negativescore_nodes -
                                    (nodes_number - i))
                    boost_score = float(-pow(booster, float(2)))
                    negscore = abs(boost_score) + negative_scoring
                    if negscore > 40:
                        boost_score = float(5)

            text_node = self.parser.getText(node)
            word_stats = self.stopwords_class(
                language=self.get_language()).get_stopword_count(text_node)
            upscore = int(word_stats.get_stopword_count() + boost_score)

            self.update_score(node, upscore)
            self.update_node_count(node, 1)

            if text_node not in dupe_track:
                parent_nodes.append(node)
                dupe_track.append(text_node)

        top_node = HtmlElement()
        for itm in parent_nodes:
            score = self.get_score(itm)
            print("{}\n--------------------\n{}\n".format(
                score, itm.text_content()))
            top_node.append(itm)

        return top_node

Пример #2

Показать файл

Файл: main.py Проект: miohtama/tweetantistorm

def set_inner_html(elem: HtmlElement, html: str):
    """Replace innerHTML of a lxml element."""

    # Clear the element contents
    child: HtmlElement
    for child in elem.getchildren():
        elem.remove(child)

    # Create and add new contents
    content = fragment_fromstring(html)
    elem.append(content)

Пример #3

Показать файл

def xhtml(div: HtmlElement, data: PostData) -> str:
    title = escape(data.title)
    date = data.date.strftime("%Y-%m-%d")
    url = ArchiveURL + data.href
    div.tag = "body"
    div.append(
        fromstring(
            f'<p class="postwebpage"><a href="{url}">[{date}]</a></p>\n\n'))
    body = html_to_string(div)
    if data.kind == "miscellaneous":
        body = pants.process(body)
    return f"""<?xml version="1.0" encoding="utf-8" standalone="no"?>

Пример #4

Показать файл

def reduce_tree(node):
    """Removes all but the important tags from a node and its children."""
    newnode = HtmlElement(attrib=node.attrib)
    newnode.tag = node.tag
    newnode.text = node.text
    for child in node.getchildren():
        newchild = reduce_tree(child)
        if child.tag.lower() in ('a', 'dl', 'h1', 'h3'):
            newnode.append(newchild)
        else:
            # we don't want this node, so get its children instead
            for grandchild in newchild.getchildren():
                newnode.append(grandchild)
    return newnode

Пример #5

Показать файл

Файл: extracted.py Проект: redsymbol/mobilize

    def process(self, default_idname=None, extra_filters=None, reqinfo=None):
        '''
        Process the extracted element, before rendering as a string

        This is for an HTML element that has been extracted and parsed
        from the document source.  We apply certain transformations and
        mods needed before it can be rendered into a string.

        Operates on self.elem, replacing it as a side effect.

        The element will be wrapped in a new div, which is given the
        class and ID according to the classvalue and idname member
        variables.  default_idname is used as a fallback idname; If
        self.idname has already been set, that will be used instead.
        It is a runtime error if neither are set.

        @param elem           : HTML element to process
        @type  elem           : lxml.html.HtmlElement

        @param default_idname : Optional fallback ID attribute to apply to the enclosing div
        @type  default_idname : str

        @param extra_filters  : Additional filters to post-apply, from moplate
        @type  extra_filters  : list of callable; or None for no filters (empty list)

        @return               : New element with the applied changes
        @rtype                : lxml.html.HtmlElement
        
        '''
        from lxml.html import HtmlElement
        if extra_filters is None:
            extra_filters = []
        def applyfilters(elem):
            from itertools import chain
            def relevant(filt):
                _is_relevant = True
                if hasattr(filt, 'relevant'):
                    assert callable(filt.relevant), filt.relevant
                    _is_relevant = filt.relevant(reqinfo)
                return _is_relevant
            for filt in chain(self.filters, extra_filters):
                if relevant(filt):
                    filt(elem)
        assert type(self.elems) is list, self.elems
        if self.idname is None:
            assert default_idname is not None, 'cannot determine an idname!'
            idname = default_idname
        else:
            idname = self.idname
        if self.filtermode == FILT_EACHELEM:
            # applying filters to extracted elements individually
            for elem in self.elems:
                applyfilters(elem)
        # wrap in special mobilize class, id
        if self.innerhtml and len(self.elems) == 1:
            newelem = copy.deepcopy(self.elems[0])
            newelem.tag = self.tag
        else:
            newelem = HtmlElement()
            newelem.tag = self.tag
            for elem in self.elems:
                newelem.append(elem)
        if self.filtermode == FILT_COLLAPSED:
            # applying filters to the single collapsed element
            applyfilters(newelem)
        newelem.attrib['class'] = self.classvalue
        newelem.attrib['id'] = idname
        if bool(self.style):
            newelem.attrib['style'] = self.style
        self.elem = newelem
        return newelem