Python BeautifulSoup.contents примеры, bs4.BeautifulSoup.contents Python примеры использования

Пример #1

0

Показать файл

def add_link_markup(tag):
    """Add necessary markup to the given link and return if modified.

    Add an external link icon if the input is not a CFPB (internal) link.
    Add an external link redirect if the input is not a gov link.
    If it contains a descendent that should not get an icon, return the link.
    If not, add a download icon if the input is a file.
    Otherwise (internal link that is not a file), return None.
    """
    icon = False

    tag = BeautifulSoup(tag, 'html.parser').find('a', href=True)

    if tag is None:
        return None

    if not tag.attrs.get('class', None):
        tag.attrs.update({'class': []})

    if tag['href'].startswith('/external-site/?'):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        components = urlparse(tag['href'])
        arguments = parse_qs(components.query)
        if 'ext_url' in arguments:
            external_url = arguments['ext_url'][0]
            # Add the redirect notice as well
            tag['href'] = signed_redirect(external_url)

    elif NON_CFPB_LINKS.match(tag['href']):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        if NON_GOV_LINKS.match(tag['href']):
            # Add the redirect notice as well
            tag['href'] = signed_redirect(tag['href'])

    elif DOWNLOAD_LINKS.search(tag['href']):
        # Sets the icon to indicate you're downloading a file
        icon = 'download'

    if tag.select(', '.join(ICONLESS_LINK_CHILD_ELEMENTS)):
        # If this tag has any children that are in our list of child elements
        # that should not get an icon, it doesn't get the icon. It might still
        # be an external link and modified accordingly above.
        return str(tag)

    if icon:
        tag.attrs['class'].append(LINK_ICON_CLASSES)
        # Wraps the link text in a span that provides the underline
        contents = tag.contents
        span = BeautifulSoup('', 'html.parser').new_tag('span')
        span['class'] = LINK_ICON_TEXT_CLASSES
        span.contents = contents
        tag.contents = [span, NavigableString(' ')]
        # Appends the SVG icon
        tag.contents.append(BeautifulSoup(svg_icon(icon), 'html.parser'))
        return str(tag)

    return None

Пример #2

0

Показать файл

def add_link_markup(tags):

    for tag in tags:
        added_icon = False
        if not tag.attrs.get('class', None):
            tag.attrs.update({'class': []})
        if tag['href'].startswith('/external-site/?'):
            components = urlparse(tag['href'])
            arguments = parse_qs(components.query)
            if 'ext_url' in arguments:
                external_url = arguments['ext_url'][0]
                tag['href'] = signed_redirect(external_url)

        elif NONCFPB_LINK_PATTERN.match(tag['href']):
            # Sets the icon to indicate you're leaving consumerfinance.gov
            tag.attrs['class'].append(EXTERNAL_A_CSS)
            if EXTERNAL_LINK_PATTERN.match(tag['href']):

                tag['href'] = signed_redirect(tag['href'])

            added_icon = True
        elif DOWNLOAD_LINK_PATTERN.search(tag['href']):
            # Sets the icon to indicate you're downloading a file
            tag.attrs['class'].append(DOWNLOAD_A_CSS)
            added_icon = True
        if added_icon:
            # Wraps the link text in a span that provides the underline
            contents = tag.contents
            span = BeautifulSoup('', 'html.parser').new_tag('span')
            span['class'] = EXTERNAL_SPAN_CSS
            span.contents = contents
            tag.contents = [span, NavigableString(' ')]
        elif not FILES_LINK_PATTERN.match(tag['href']):
            fix_link(tag)

Пример #3

0

Показать файл

def add_link_markup(tags):
    for tag in tags:
        added_icon = False
        if not tag.attrs.get('class', None):
            tag.attrs.update({'class': []})
        if NONCFPB_LINK_PATTERN.match(tag['href']):
            # Sets the icon to indicate you're leaving consumerfinance.gov
            tag.attrs['class'].append(EXTERNAL_A_CSS)
            if EXTERNAL_LINK_PATTERN.match(tag['href']):
                # Sets the link to an external one if you're leaving .gov
                tag['href'] = '/external-site/?ext_url=' + tag['href']
            added_icon = True
        elif DOWNLOAD_LINK_PATTERN.search(tag['href']):
            # Sets the icon to indicate you're downloading a file
            tag.attrs['class'].append(DOWNLOAD_A_CSS)
            added_icon = True
        if added_icon:
            # Wraps the link text in a span that provides the underline
            contents = tag.contents
            span = BeautifulSoup('').new_tag('span')
            span['class'] = EXTERNAL_SPAN_CSS
            span.contents = contents
            tag.contents = [span, NavigableString(' ')]
        elif not FILES_LINK_PATTERN.match(tag['href']):
            fix_link(tag)

Пример #4

0

Показать файл

Файл: __init__.py Проект: amymok/cfgov-refresh

def add_link_markup(tags):

    for tag in tags:
        added_icon = False
        if not tag.attrs.get('class', None):
            tag.attrs.update({'class': []})
        if tag['href'].startswith('/external-site/?'):
            components = urlparse(tag['href'])
            arguments = parse_qs(components.query)
            if 'ext_url' in arguments:
                external_url = arguments['ext_url'][0]
                tag['href'] = signed_redirect(external_url)

        elif NONCFPB_LINK_PATTERN.match(tag['href']):
            # Sets the icon to indicate you're leaving consumerfinance.gov
            tag.attrs['class'].append(EXTERNAL_A_CSS)
            if EXTERNAL_LINK_PATTERN.match(tag['href']):

                tag['href'] = signed_redirect(tag['href'])

            added_icon = True
        elif DOWNLOAD_LINK_PATTERN.search(tag['href']):
            # Sets the icon to indicate you're downloading a file
            tag.attrs['class'].append(DOWNLOAD_A_CSS)
            added_icon = True
        if added_icon:
            # Wraps the link text in a span that provides the underline
            contents = tag.contents
            span = BeautifulSoup('', 'html.parser').new_tag('span')
            span['class'] = EXTERNAL_SPAN_CSS
            span.contents = contents
            tag.contents = [span, NavigableString(' ')]
        elif not FILES_LINK_PATTERN.match(tag['href']):
            fix_link(tag)

Пример #5

0

Показать файл

Файл: __init__.py Проект: Newman101/cfgov-refresh

def add_link_markup(tags):
    for tag in tags:
        added_icon = False
        if not tag.attrs.get('class', None):
            tag.attrs.update({'class': []})
        if NONCFPB_LINK_PATTERN.match(tag['href']):
            # Sets the icon to indicate you're leaving consumerfinance.gov
            tag.attrs['class'].append(EXTERNAL_A_CSS)
            if EXTERNAL_LINK_PATTERN.match(tag['href']):
                # Sets the link to an external one if you're leaving .gov
                tag['href'] = '/external-site/?ext_url=' + tag['href']
            added_icon = True
        elif DOWNLOAD_LINK_PATTERN.search(tag['href']):
            # Sets the icon to indicate you're downloading a file
            tag.attrs['class'].append(DOWNLOAD_A_CSS)
            added_icon = True
        if added_icon:
            # Wraps the link text in a span that provides the underline
            contents = tag.contents
            span = BeautifulSoup('', 'html.parser').new_tag('span')
            span['class'] = EXTERNAL_SPAN_CSS
            span.contents = contents
            tag.contents = [span, NavigableString(' ')]
        elif not FILES_LINK_PATTERN.match(tag['href']):
            fix_link(tag)

Пример #6

0

Показать файл

Файл: simple_json.py Проект: kenanbit/pocket-ncurses

def plain_content(readability_content, content_digests, node_indexes):
    # Load article as DOM
    soup = BeautifulSoup(readability_content, 'html.parser')
    # Make all elements plain
    elements = plain_elements(soup.contents, content_digests, node_indexes)
    if node_indexes:
        # Add node index attributes to nodes
        elements = [add_node_indexes(element) for element in elements]
    # Replace article contents with plain elements
    soup.contents = elements
    return str(soup)

Пример #7

0

Показать файл

def add_link_markup(tag):
    """Add necessary markup to the given link and return if modified.

    Add an external link icon if the input is not a CFPB (internal) link.
    Add an external link redirect if the input is not a gov link.
    Add a download icon if the input is a file.
    Otherwise (internal link that is not a file), return None.
    """
    icon = False

    if not tag.attrs.get('class', None):
        tag.attrs.update({'class': []})

    if tag['href'].startswith('/external-site/?'):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        components = urlparse(tag['href'])
        arguments = parse_qs(components.query)
        if 'ext_url' in arguments:
            external_url = arguments['ext_url'][0]
            # Add the redirect notice as well
            tag['href'] = signed_redirect(external_url)

    elif NON_CFPB_LINKS.match(tag['href']):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        if NON_GOV_LINKS.match(tag['href']):
            # Add the redirect notice as well
            tag['href'] = signed_redirect(tag['href'])

    elif DOWNLOAD_LINKS.search(tag['href']):
        # Sets the icon to indicate you're downloading a file
        icon = 'download'

    if icon:
        tag.attrs['class'].append(LINK_ICON_CLASSES)
        # Wraps the link text in a span that provides the underline
        contents = tag.contents
        span = BeautifulSoup('', 'html.parser').new_tag('span')
        span['class'] = LINK_ICON_TEXT_CLASSES
        span.contents = contents
        tag.contents = [span, NavigableString(' ')]
        # Appends the SVG icon
        tag.contents.append(BeautifulSoup(svg_icon(icon), 'html.parser'))
        return str(tag)

    return None

Пример #8

0

Показать файл

Файл: utils.py Проект: contolini/cfgov-refresh

def add_link_markup(tag):
    """Add necessary markup to the given link and return if modified.

    Add an external link icon if the input is not a CFPB (internal) link.
    Add an external link redirect if the input is not a gov link.
    Add a download icon if the input is a file.
    Otherwise (internal link that is not a file), return None.
    """
    icon = False

    if not tag.attrs.get('class', None):
        tag.attrs.update({'class': []})

    if tag['href'].startswith('/external-site/?'):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        components = urlparse(tag['href'])
        arguments = parse_qs(components.query)
        if 'ext_url' in arguments:
            external_url = arguments['ext_url'][0]
            # Add the redirect notice as well
            tag['href'] = signed_redirect(external_url)

    elif NON_CFPB_LINKS.match(tag['href']):
        # Sets the icon to indicate you're leaving consumerfinance.gov
        icon = 'external-link'
        if NON_GOV_LINKS.match(tag['href']):
            # Add the redirect notice as well
            tag['href'] = signed_redirect(tag['href'])

    elif DOWNLOAD_LINKS.search(tag['href']):
        # Sets the icon to indicate you're downloading a file
        icon = 'download'

    if icon:
        tag.attrs['class'].append(LINK_ICON_CLASSES)
        # Wraps the link text in a span that provides the underline
        contents = tag.contents
        span = BeautifulSoup('', 'html.parser').new_tag('span')
        span['class'] = LINK_ICON_TEXT_CLASSES
        span.contents = contents
        tag.contents = [span, NavigableString(' ')]
        # Appends the SVG icon
        tag.contents.append(BeautifulSoup(svg_icon(icon), 'html.parser'))
        return str(tag)

    return None

Пример #9

0

Показать файл

def render_table_odt(elem, doc):
    table = elem.content[0]
    table_number = tuple(
        str(i) for i in utils.get_elem_count(doc, pf.Table, register="table"))
    table_name = "Table{}".format("_".join(str(i) for i in table_number))
    #
    table_root = BeautifulSoup("", "xml")

    if hasattr(table, "caption") and table.caption:
        colon = ": "
        caption = "".join(pf.stringify(c) for c in table.caption)
    else:
        colon = ""
        caption = ""

    caption_odt = utils.create_nested_tags(
        **{
            "name":
            "text:p",
            "attrs": {
                "text:style-name": "Table"
            },
            "contents": [
                {
                    "name":
                    "text:span",
                    "attrs": {
                        "text:style-name": "Strong_20_Emphasis"
                    },
                    "contents": [
                        "Table ",
                        {
                            "name": "text:sequence",
                            "attrs": {
                                "text:ref-name": f"ref{table_name}",
                                "text:name": "Table",
                                "text:formula": "ooow:Table+1",
                                "style:num-format": "1",
                            },
                            "contents": [".".join(table_number)],
                        },
                        colon,
                    ],
                },
                caption,
            ],
        })

    table_root.contents.append(caption_odt)

    table_odt = utils.create_nested_tags(
        **{
            "name": "table:table",
            "attrs": {
                "table:name": table_name,
                "table:style-name": table_name,
                "table:template-name": "Default Style",
            },
        })

    table_root.contents.append(table_odt)

    unoccupied_width = 1 - sum(table.width)
    unspecified_widths = len([w for w in table.width if not w])
    remaining_for_each = unoccupied_width / unspecified_widths

    widths = [w if w else remaining_for_each for w in table.width]

    # We want the table to occupy a maximum width
    widths = map(lambda x: x * table.total_width, widths)

    column_style_names, column_styles, column_definitions = zip(
        *create_column_definitions(widths, table_name))

    pf.debug(column_style_names, column_styles, column_definitions)

    styles = BeautifulSoup("", "xml")
    styles.contents = list(column_styles)

    table_odt.contents.extend(column_definitions)

    for r, row in enumerate(table.content):
        row_odt = Tag(name="table:table-row")
        row_odt.attrs = {
            "table:style-name":
            "{table_name}.{r}".format(table_name=table_name, r=r + 1)
        }

        row_cell_styles = []

        for c, cell in enumerate(row.content):

            if cell.covered:
                cell_odt = Tag(name="table:covered-table-cell")
                row_odt.contents.append(cell_odt)

                row_cell_styles.append(None)
            else:
                cell_odt = Tag(name="table:table-cell")

                cell_style_name = "{column_style}{r}".format(
                    column_style=column_style_names[c], r=r + 1)

                cell_style = Tag(name="style:style")
                cell_style.attrs = {
                    "style:name": cell_style_name,
                    "style:family": "table-cell",
                    "style:writing-mode": "page",
                }
                style_cell_properies = Tag(name="style:table-cell-properties")
                style_cell_properies.attrs = {
                    "fo:padding-left": "0.10cm",
                    "fo:padding-right": "0.10cm",
                    "fo:padding-top": "0.10cm",
                    "fo:padding-bottom": "0.10cm",
                    "style:vertical-align": "bottom",
                }
                style_background_image = Tag(name="style:background-image")
                style_cell_properies.contents.append(style_background_image)
                cell_style.contents.append(style_cell_properies)

                row_cell_styles.append(cell_style)

                cell_odt.attrs = {
                    "table:style-name": cell_style_name,
                    "office:value-type": "string",
                }

                if cell.col_span > 1:
                    cell_odt.attrs[
                        "table:number-columns-spanned"] = cell.col_span

                if cell.content:
                    cell_content = utils.panflute2output(
                        cell.content, format="opendocument").strip()

                    cell_content = BeautifulSoup(cell_content,
                                                 "lxml").html.body

                    text_p = re.compile("text:p")

                    for t in cell_content.find_all(text_p):
                        if cell.heading == 1:
                            t["text:style-name"] = "Table_20_Heading"
                        elif cell.heading == 2:
                            t["text:style-name"] = "Table_20_Subheading"
                        else:
                            t["text:style-name"] = "Table_20_Contents"

                        if cell.vertical:
                            t_contents = t.contents
                            t.contents = [
                                utils.create_nested_tags(
                                    **{
                                        "name": "text:span",
                                        "attrs": {
                                            "text:style-name": "Vertical"
                                        },
                                        "contents": t_contents,
                                    })
                            ]
                    cell_odt.contents = cell_content.contents
                else:
                    cell_content = Tag(name="text:p")
                    cell_content.attrs = {
                        "text:style-name": "Table_20_contents"
                    }
                    cell_odt.contents.append(cell_content)

                row_odt.contents.append(cell_odt)

        if row.underlines:
            for underline in row.underlines:
                start = underline[0]
                stop = underline[1]

                for i in range(start - 1, stop):
                    cell_style = row_cell_styles[i]

                    if cell_style is None:
                        pass
                    else:
                        cell_style.contents[0].attrs[
                            "fo:border-bottom"] = "0.5pt solid #000000"

        add_top_space = table.content[r - 1].btm_space if r else False

        if row.top_space or add_top_space:
            for cell_style in row_cell_styles:
                if cell_style is not None:
                    padding_top = cell_style.contents[0].attrs[
                        "fo:padding-top"]

                    padding_top = (float(padding_top.strip("cm")) +
                                   0.05 * add_top_space + 0.05 * row.top_space)

                    cell_style.contents[0].attrs[
                        "fo:padding-top"] = f"{padding_top}cm"

        row_cell_styles = [cs for cs in row_cell_styles if cs is not None]
        styles.contents.extend(row_cell_styles)

        table_odt.contents.append(row_odt)

    try:
        footer = elem.content[1].content[0]
    except IndexError:
        footer = None

    if footer is not None:
        for definition_item in footer.content:
            term = "".join(pf.stringify(e) for e in definition_item.term)

            definitions = [
                utils.panflute2output(d.content, format="opendocument")
                for d in definition_item.definitions
            ]
            definitions_parsed = BeautifulSoup("".join(definitions),
                                               "lxml").html.body.contents

            for t in definitions_parsed:
                if t.name == "text:p":
                    t.name = "text:span"
                    t.contents.insert(0, NavigableString(" "))

            definition = utils.create_nested_tags(
                **{
                    "name":
                    "text:p",
                    "attrs": {
                        "text:style-name": "Table_20_Legend"
                    },
                    "contents": [{
                        "name": "text:span",
                        "attrs": {
                            "text:style-name": "Superscript"
                        },
                        "contents": [term],
                    }] + definitions_parsed,
                })
            table_root.contents.append(definition)

    styles = "\n".join(c.prettify() for c in styles.contents)
    doc.auto_styles.append(styles)

    table = "\n".join(str(c) for c in table_root.contents)
    # pf.debug(table)

    return table

Python BeautifulSoup.contents примеры использования