Python Element.get примеры использования

Язык программирования: Python

Пространство имен/Пакет: lxml.html

Класс/Тип: Element

Метод/Функция: get

Примеров на hotexamples.com: 3

Python Element.get - 3 примера найдено. Это лучшие примеры Python кода для lxml.html.Element.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Element(30)

append(15)

findall(4)

extend(3)

find(3)

getparent(3)

insert(3)

iterchildren(3)

set(3)

get(2)

iterdescendants(2)

remove(2)

cssselect(1)

getchildren(1)

Пример #1

Показать файл

    def view_selection(self, req, resp, url):
        """
        View the highlighted selector (from `action_view`)
        """
        from deliverance.selector import Selector
        doc = document_fromstring(resp.body)
        el = Element('base')
        el.set('href', posixpath.dirname(url) + '/')
        doc.head.insert(0, el)
        selector = Selector.parse(req.GET['selector'])
        dummy_type, elements, dummy_attributes = selector(doc)
        if not elements:
            template = self._not_found_template
        else:
            template = self._found_template
        all_elements = []
        els_in_head = False
        for index, el in enumerate(elements):
            el_in_head = self._el_in_head(el)
            if el_in_head:
                els_in_head = True
            anchor = 'deliverance-selection'
            if index:
                anchor += '-%s' % index
            if el.get('id'):
                anchor = el.get('id')
            ## FIXME: is a <a name> better?
            if not el_in_head:
                el.set('id', anchor)
            else:
                anchor = None
            ## FIXME: add :target CSS rule
            ## FIXME: or better, some Javascript
            all_elements.append((anchor, el))
            if not el_in_head:
                style = el.get('style', '')
                if style:
                    style += '; '
                style += '/* deliverance */ border: 2px dotted #f00'
                el.set('style', style)
            else:
                el.set('DELIVERANCE-MATCH', '1')

        def highlight(html_code):
            """Highlights the given code (for use in the template)"""
            if isinstance(html_code, _Element):
                html_code = tostring(html_code)
            return html(
                pygments_highlight(html_code, HtmlLexer(),
                                   HtmlFormatter(noclasses=True)))

        def format_tag(tag):
            """Highlights the lxml HTML tag"""
            return highlight(tostring(tag).split('>')[0] + '>')

        def wrap_html(html, width=100):
            if isinstance(html, _Element):
                html = tostring(html)
            lines = html.splitlines()
            new_lines = []

            def wrap_html_line(line):
                if len(line) <= width:
                    return [line]
                match_trail = re.search(r'^[^<]*</.*?>', line, re.S)
                if match_trail:
                    result = [match_trail.group(0)]
                    result.extend(wrap_html_line(line[match_trail.end():]))
                    return result
                match1 = re.search(r'^[^<]*<[^>]*>', line, re.S)
                match2 = re.search(r'<[^>]*>[^<>]*$', line, re.S)
                if not match1 or not match2:
                    return [line]
                result = [match1.group(0)]
                result.extend(wrap_html_line(
                    line[match1.end():match2.start()]))
                result.append(match2.group(0))
                return result

            for line in lines:
                new_lines.extend(wrap_html_line(line))
            return '\n'.join(new_lines)

        def mark_deliv_match(highlighted_text):
            result = re.sub(
                r'(?:<[^/][^>]*>)*&lt;.*?DELIVERANCE-MATCH=.*?&gt;(?:</[^>]*>)*',
                lambda match: r'<b style="background-color: #ff8">%s</b>' %
                match.group(0), unicode(highlighted_text), re.S)
            return html(result)

        text = template.substitute(base_url=url,
                                   els_in_head=els_in_head,
                                   doc=doc,
                                   elements=all_elements,
                                   selector=selector,
                                   format_tag=format_tag,
                                   highlight=highlight,
                                   wrap_html=wrap_html,
                                   mark_deliv_match=mark_deliv_match)
        message = fromstring(
            self._message_template.substitute(message=text, url=url))
        if doc.body.text:
            message.tail = doc.body.text
            doc.body.text = ''
        doc.body.insert(0, message)
        text = tostring(doc)
        return Response(text)

Пример #2

Показать файл

Файл: middleware.py Проект: natea/Deliverance

 def view_selection(self, req, resp, url):
     """
     View the highlighted selector (from `action_view`)
     """
     from deliverance.selector import Selector
     doc = document_fromstring(resp.body)
     el = Element('base')
     el.set('href', posixpath.dirname(url) + '/')
     doc.head.insert(0, el)
     selector = Selector.parse(req.GET['selector'])
     dummy_type, elements, dummy_attributes = selector(doc)
     if not elements:
         template = self._not_found_template
     else:
         template = self._found_template
     all_elements = []
     els_in_head = False
     for index, el in enumerate(elements):
         el_in_head = self._el_in_head(el)
         if el_in_head:
             els_in_head = True
         anchor = 'deliverance-selection'
         if index:
             anchor += '-%s' % index
         if el.get('id'):
             anchor = el.get('id')
         ## FIXME: is a <a name> better?
         if not el_in_head:
             el.set('id', anchor)
         else:
             anchor = None
         ## FIXME: add :target CSS rule
         ## FIXME: or better, some Javascript
         all_elements.append((anchor, el))
         if not el_in_head:
             style = el.get('style', '')
             if style:
                 style += '; '
             style += '/* deliverance */ border: 2px dotted #f00'
             el.set('style', style)
         else:
             el.set('DELIVERANCE-MATCH', '1')
     def highlight(html_code):
         """Highlights the given code (for use in the template)"""
         if isinstance(html_code, _Element):
             html_code = tostring(html_code)
         return html(pygments_highlight(html_code, HtmlLexer(),
                                        HtmlFormatter(noclasses=True)))
     def format_tag(tag):
         """Highlights the lxml HTML tag"""
         return highlight(tostring(tag).split('>')[0]+'>')
     def wrap_html(html, width=100):
         if isinstance(html, _Element):
             html = tostring(html)
         lines = html.splitlines()
         new_lines = []
         def wrap_html_line(line):
             if len(line) <= width:
                 return [line]
             match_trail = re.search(r'^[^<]*</.*?>', line, re.S)
             if match_trail:
                 result = [match_trail.group(0)]
                 result.extend(wrap_html_line(line[match_trail.end():]))
                 return result
             match1 = re.search(r'^[^<]*<[^>]*>', line, re.S)
             match2 = re.search(r'<[^>]*>[^<>]*$', line, re.S)
             if not match1 or not match2:
                 return [line]
             result = [match1.group(0)]
             result.extend(wrap_html_line(line[match1.end():match2.start()]))
             result.append(match2.group(0))
             return result
         for line in lines:
             new_lines.extend(wrap_html_line(line))
         return '\n'.join(new_lines)
     def mark_deliv_match(highlighted_text):
         result = re.sub(r'(?:<[^/][^>]*>)*&lt;.*?DELIVERANCE-MATCH=.*?&gt;(?:</[^>]*>)*', lambda match: r'<b style="background-color: #ff8">%s</b>' % match.group(0), unicode(highlighted_text), re.S)
         return html(result)
     text = template.substitute(
         base_url=url,
         els_in_head=els_in_head, doc=doc,
         elements=all_elements, selector=selector, 
         format_tag=format_tag, highlight=highlight, 
         wrap_html=wrap_html, mark_deliv_match=mark_deliv_match)
     message = fromstring(
         self._message_template.substitute(message=text, url=url))
     if doc.body.text:
         message.tail = doc.body.text
         doc.body.text = ''
     doc.body.insert(0, message)
     text = tostring(doc)
     return Response(text)

Пример #3

Показать файл

Файл: html_chunk_tool_cmd_with_class_warning.py Проект: markfawcett/business-papers

def split_and_output(input_root,
                     template_file_name,
                     input_file_name,
                     output_folder=''):

    output_tree = html.parse(template_file_name)
    # output_root = output_tree.getroot()
    # put element lists in dict with file_lable as the key
    file_lables_element_lists = {'op': [], 'new_fb': [], 'an': []}
    # select all the paragraphs etc within the top levle divs
    paragraph_elements = input_root.xpath('//body/div/*')
    list_to_add_to = file_lables_element_lists['op']

    # look through all the paragraph elemets and find out if any are Annoncements etc
    for paragraph_element in paragraph_elements:

        # if (paragraph_element.get('class') == 'DocumentTitle'
        #      and 'PART 2' in paragraph_element.text_content().upper()):
        #     # start new list for future business
        #     list_to_add_to = file_lables_element_lists['new_fb']
        list_to_add_to.append(paragraph_element)

    # build up output trees
    for file_lable, element_list in file_lables_element_lists.items():
        if len(element_list) != 0:

            # copy the template tree and add elements needed for this section
            temp_output_tree = deepcopy(output_tree)
            # temp_output_tree = html.parse(template_file_name)
            temp_output_root = temp_output_tree.getroot()

            # change the title
            if file_lable == 'op':
                title_text = 'Order Paper for ' + DATES.sitting_date_medium
                h1_text = 'Order Paper for ' + DATES.sitting_date_long
            # elif file_lable == 'new_fb':
            #     title_text = 'Future Business as of ' + DATES.sitting_date_medium
            #     h1_text = 'Future Business as of ' + DATES.sitting_date_long
            temp_output_root.xpath('//h1[@id="mainTitle"]')[0].text = h1_text
            temp_output_root.xpath('//head/title')[0].text = title_text

            # get the position (in the template) where we will inject html (from the input)
            code_injection_point = temp_output_root.xpath(
                '//div[@id="content-goes-here"]')[0]
            for element in element_list:
                # remove Future Business heading from start of part 2
                if 'paraChamberSummaryHeading' in element.classes:
                    if element.text_content().lower() == 'future business':
                        continue
                # remove the docuemnt headings from the html i.e. part 1 head
                if 'DocumentTitle' in element.classes:
                    text_content = element.text_content().lower()
                    h2 = Element('h2')
                    h2.set('class', 'OP-heading-outdent')
                    if 'part 1' in text_content:
                        h2.text = 'Part 1: Business Today'
                        code_injection_point.append(h2)
                    elif 'part 2' in text_content:
                        h2.text = 'Part 2: Future Business'
                        code_injection_point.append(h2)
                else:
                    code_injection_point.append(element)

            # Add IDs and perminant ancors to the html
            # Added at the request of IDMS
            # need to get all the heading elements
            xpath = '//h1|//h2|//h3|//h4|//h5|//h6|//*[@class="paraBusinessItemHeading"]' \
                    '|//*[@class="paraBusinessItemHeading-bulleted"]|//*[@class="FbaLocation"]'
            headings = temp_output_root.xpath(xpath)
            for i, heading in enumerate(headings):
                # generate id text
                id_text = f'{DATES.sitting_date_compact}-{i}'

                if heading.get('id', default=None):
                    heading.set('name', heading.get('id'))

                heading.set('id', id_text)
                # parmalink_span = SubElement(heading, 'span')
                # parmalink_span.set('class', 'perma-link')
                # anchor = SubElement(parmalink_span, 'a')
                anchor = SubElement(heading, 'a')
                permalink_for = 'Permalink for ' + heading.text_content()
                anchor.set('href', '#' + id_text)
                anchor.set('aria-label', 'Anchor')
                anchor.set('title', permalink_for)
                anchor.set('data-anchor-icon', '§')
                anchor.set('class', 'anchor-link')

            # create the tables of contents
            # This will be overridden by tocbot.
            # We still want a ToC even if JavaScript is dissabled...

            # find where to put the Toc
            nav_xpath_results = temp_output_root.xpath('//nav[@id="toc"][1]')

            # look for all the h2's
            # // Where to grab the headings to build the table of contents.
            # contentSelector: '.js-toc-content'
            h2s = temp_output_root.xpath(
                '//*[contains(@class, "js-toc-content")]//h2')

            if len(nav_xpath_results):
                toc_injection_point = nav_xpath_results[0]
                ol = SubElement(toc_injection_point, 'ol')
                ol.set('class', 'toc-list')
                for h2 in h2s:
                    li = SubElement(ol, 'li')
                    li.set('class', 'toc-list-item')

                    a = SubElement(li, 'a')
                    a.set('href', '#' + h2.get('id', ''))
                    a.set('class', 'toc-link')
                    a.text = h2.text_content()
            else:
                print('no element')

            # itterate through tree and remove CR from tail and text
            for element in temp_output_root.iter():
                if element.tail:
                    element.tail = element.tail.replace('\r', '')
                if element.text:
                    element.text = element.text.replace('\r', '')

            # write out the output html files
            # outputfile_name = os.path.join(os.path.dirname(input_file_name),
            #                                file_lable + DATES.sitting_date_compact[2:] + fileextension)

            outputfile_name = f'{file_lable}{DATES.sitting_date_compact[2:]}{ fileextension}'
            if output_folder:
                outputfile_path = Path(output_folder).joinpath(outputfile_name)
                # print(outputfile_path)
            else:
                outputfile_path = Path(input_file_name).parent.joinpath(
                    outputfile_name)

            # created element tree so we can use write method
            # temp_output_tree = ElementTree(temp_output_root)
            temp_output_tree.write(str(outputfile_path),
                                   doctype=DOCTYPE,
                                   encoding='UTF-8',
                                   method="html",
                                   xml_declaration=False)
            print(f'{file_lable} file is at:\t{outputfile_path}')