Exemplo n.º 1
0
    def test_table_item_markdown(self, processing_options):
        item = TableItem(processing_options, [TextItem(processing_options, 'Column 1')])

        expected = 'Column 1'

        result = item.markdown()

        assert result == expected
Exemplo n.º 2
0
    def test_html_output(self, processing_options):
        contents = [
            TableItem(processing_options, [TextItem(processing_options, 'Column 1')]),
            TableItem(processing_options, [TextItem(processing_options, 'Column 2')]),
        ]

        row = TableRow(processing_options, contents)

        expected = "<tr><tr><td>Column 1</td><td>Column 2</td></tr>"

        result = row.html()

        assert result == expected
Exemplo n.º 3
0
def extract_from_nimbus_table_select_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table select item.
    Nimbus HTML uses a  tag class to specify a select item. Returns a text as a TextItem in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        Select text as a TextItem in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    span_tags = cell_tag.find_all('span', class_="select-label-text")
    if not span_tags:
        return

    cell_text = ''
    for span in span_tags:
        cell_text = f"{cell_text}{span.text} "

    cell_text = cell_text.strip()
    return TableItem(processing_options,
                     [TextItem(processing_options, cell_text)])
Exemplo n.º 4
0
def extract_from_nimbus_table_collaboration_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table collaboration item.
    Nimbus HTML uses a  tag class to specify a collaboration item. Returns a text as a TextItem in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        collaboration text as a TextItem in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    span_tag = cell_tag.find('span', class_="collaborate-item")
    if not span_tag:
        return

    return TableItem(
        processing_options,
        [TableCollaborator(processing_options, span_tag["data-mention-name"])])
Exemplo n.º 5
0
def extract_from_nimbus_table_date_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table date item.
    Nimbus HTML uses a tag class to specify a the date item. Returns a text as a TextItem in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        Date text as a TextItem in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    span_tag = cell_tag.find('span', class_="input-date-text")
    if not span_tag:
        return

    return TableItem(processing_options,
                     [TextItem(processing_options, span_tag.text)])
Exemplo n.º 6
0
def extract_from_nimbus_table_hyperlink_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table hyperlink item.
    Nimbus HTML uses a tag class to specify a the date item. Returns a Hyperlink object in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        Hyperlink object in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    a_tag_search = cell_tag.find_all('a')
    if a_tag_search:
        a_tag = a_tag_search[0]
        hyperlink = html_data_extractors.extract_from_hyperlink(
            a_tag, processing_options)
        return TableItem(processing_options, [hyperlink])
Exemplo n.º 7
0
def extract_from_nimbus_table_rating_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table rating item.
    Nimbus HTML uses a <span> tag and tag class to specify each star in the rating. Count the number of active stars
    and return a rating string.  Returns a Hyperlink object in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        Hyperlink object in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    active_stars = cell_tag.find_all('span', class_="rating-active")
    if active_stars:
        rating_text = f'Rating {len(active_stars)}/5 stars'
        return TableItem(processing_options,
                         [TextItem(processing_options, rating_text)])
Exemplo n.º 8
0
def extract_from_nimbus_table_progress_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table progress item.
    Nimbus HTML uses a <span> tag and tag class to specify the progress as a text value.  Returns a Hyperlink object
    in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        TextItem object in a TableItem wrapper

    """
    if cell_tag.name != 'td':
        return

    progress_span = cell_tag.find('span', class_="progress-value")
    if progress_span:
        progress_text = f'Progress {progress_span.text}'
        return TableItem(processing_options,
                         [TextItem(processing_options, progress_text)])
Exemplo n.º 9
0
    def test_table_markdown(self, processing_options):
        contents = [
            TableItem(processing_options, [TextItem(processing_options, 'Column 1')]),
            TableItem(processing_options, [TextItem(processing_options, 'Column 2')]),
        ]

        header_row = TableHeader(processing_options, contents)

        row_contents = [
            TableItem(processing_options, [TextItem(processing_options, 'Row 1')]),
            TableItem(processing_options, [TextItem(processing_options, 'Row 2')]),
        ]

        row = TableRow(processing_options, row_contents)

        table = Table(processing_options, [header_row, row])

        expected = '\n|Column 1|Column 2|\n|--|--|\n|Row 1|Row 2|\n\n'

        result = table.markdown()

        assert result == expected
Exemplo n.º 10
0
    def test_table_html(self, processing_options):
        contents = [
            TableItem(processing_options, [TextItem(processing_options, 'Column 1')]),
            TableItem(processing_options, [TextItem(processing_options, 'Column 2')]),
        ]

        header_row = TableHeader(processing_options, contents)

        row_contents = [
            TableItem(processing_options, [TextItem(processing_options, 'Row 1')]),
            TableItem(processing_options, [TextItem(processing_options, 'Row 2')]),
        ]

        row = TableRow(processing_options, row_contents)

        table = Table(processing_options, [header_row, row])

        expected = '<table border="1"><tr><tr><th>Column 1</th><th>Column 2</th></tr><tr><tr><td>Row 1</td><td>Row 2</td></tr></table>'

        result = table.html()

        assert result == expected
Exemplo n.º 11
0
def extract_from_123abc_table_header_row(cells, processing_options):
    """
    If 123 and ABC row and columns headers are to be kept extract and return the ABC row

    Parameters
    ----------
    cells : list[Tag]
        lost of <th> tags form a nimbus html table.  No check is made that the provided tags are <th> tags
    processing_options : NimbusProcessingOptions
        processing settings for the current conversion

    Returns
    -------
    list
        list of TableItem objects
    """
    skip_next = False
    data = []
    for cell in cells:
        if skip_next:
            skip_next = False
            continue

        if cell.get('class') and 'table-head-start' in cell['class']:
            skip_next = True

            # append an empty cell where numbers and letters row/columns meet
            data.append(
                TableItem(processing_options,
                          [TextItem(processing_options, '')]))

            continue

        header_div = cell.find('div', class_="item-title")
        cell_text = process_child_items(header_div, processing_options)
        data.append(TableItem(processing_options, cell_text))

    return data
Exemplo n.º 12
0
def extract_from_nimbus_table_mention_item(
        cell_tag, processing_options: NimbusProcessingOptions):
    """
    Extract data form a Nimbus table mention item.
    Nimbus HTML uses a  tag class to specify a mention item. Returns a text as a TextItem in a TableItem wrapper

    Parameters
    ==========
    processing_options : NimbusProcessingOptions
        Processing options for nimbus html conversion
    cell_tag : beautiful soup <td> tag object

    Returns
    =======
    TableItem
        Mention text as a TextItem in a TableItem wrapper
    """
    if cell_tag.name != 'td':
        return

    def span_has_data_mention_type(tag):
        return tag.has_attr('data-mention-type')

    if cell_tag.get('class') and 'cell-mention' in cell_tag.attrs['class']:

        # NOTE here passing function to find so we can get the correct tag with out iterating all the spans
        # to find the right one.
        mention_tag = cell_tag.find(span_has_data_mention_type)

        if not mention_tag:
            return

        mention_item = extract_from_nimbus_mention_span(
            mention_tag, processing_options)

        return TableItem(processing_options, [mention_item])
Exemplo n.º 13
0
def extract_from_table_row(cells, processing_options):
    keep_abc_123_columns = processing_options.keep_abc_123_columns
    skip_next = False
    data = []
    for cell in cells:
        if skip_next:
            skip_next = False
            continue

        # if cell.name == 'td':
        if cell.get('class'):
            if 'table-head-item' in cell['class']:
                skip_next = True
                if not keep_abc_123_columns:
                    continue
            if 'add-row' in cell['class']:
                break

        table_check_item = extract_from_nimbus_table_check_item(
            cell, processing_options)
        if table_check_item:
            data.append(table_check_item)
            continue

        table_select_item = extract_from_nimbus_table_select_item(
            cell, processing_options)
        if table_select_item:
            data.append(table_select_item)
            continue

        table_mention_item = extract_from_nimbus_table_mention_item(
            cell, processing_options)
        if table_mention_item:
            data.append(table_mention_item)
            continue

        table_collaboration_item = extract_from_nimbus_table_collaboration_item(
            cell, processing_options)
        if table_collaboration_item:
            data.append(table_collaboration_item)
            continue

        table_date_item = extract_from_nimbus_table_date_item(
            cell, processing_options)
        if table_date_item:
            data.append(table_date_item)
            continue

        table_cell_hyperlink = extract_from_nimbus_table_hyperlink_item(
            cell, processing_options)
        if table_cell_hyperlink:
            data.append(table_cell_hyperlink)
            continue

        table_cell_rating = extract_from_nimbus_table_rating_item(
            cell, processing_options)
        if table_cell_rating:
            data.append(table_cell_rating)
            continue

        table_cell_progress_bar = extract_from_nimbus_table_progress_item(
            cell, processing_options)
        if table_cell_progress_bar:
            data.append(table_cell_progress_bar)
            continue

        cell_text = extract_from_nimbus_table_text_item(
            cell, processing_options)
        if cell_text:
            data.append(TableItem(processing_options, cell_text))
            continue

        # handle empty cells <td></td> or unrecognised cells e.g. table-attachements are currently there but
        # have no content
        # <td class="cell-attachment"><div class="table-attachment-wrap"><div><div class="table-attachment">
        # <div class="attachment-item"></div></div></div></div></td>
        data.append(
            TableItem(processing_options, [TextItem(processing_options, '')]))

    return data