def process_orphan_files(a_note, attachment_folder_name, orphans, processing_options, zip_file_path): asset_links = [] # find body content and add a heading new_body_contents = get_copy_of_note_body_contents(a_note) new_body_contents.append( HeadingItem(processing_options, [TextItem(processing_options, 'Note Attachments')], level=3, id='')) for file in orphans: # create a new link object for the orphan file contents = TextItem(processing_options, str(file)) href = str(Path('assets', file)) target_filename = str(file) # source_path = Path('assets', file) new_link = FileAttachmentCleanHTML(processing_options, contents, href, target_filename) new_link.set_target_path(attachment_folder_name) # add link to body new_body_contents.append(new_link) # add link to asset_links asset_links.append(new_link) # update the body_contents update_note_body_contents(a_note, new_body_contents) # process the new links _ = extract_and_write_assets(a_note, asset_links, attachment_folder_name, zip_file_path)
def test_note_data_find_self(self): item = TextItem(processing_options, 'Hello') result = item.find_items(class_=TextItem) assert len(result) == 1 assert isinstance(result[0], TextItem)
def test_embed_nimbus_markdown_output(self, processing_options): embed_caption = Paragraph(processing_options, [TextItem(processing_options, 'caption')]) contents = BlockQuote(processing_options, [TextItem(processing_options, 'some html')]) item = EmbedNimbus(processing_options, contents, embed_caption) result = item.markdown() assert result == '> some html\n\ncaption\n\n'
def test_embed_nimbus_html_output(self, processing_options): embed_caption = Paragraph(processing_options, [TextItem(processing_options, 'caption')]) contents = BlockQuote(processing_options, [TextItem(processing_options, 'some html')]) item = EmbedNimbus(processing_options, contents, embed_caption) result = item.html() assert result == '<p><blockquote>some html</blockquote>/p><p>caption</p>'
def test_table_of_contents(self, processing_options): title_contents = [TextItem(processing_options, 'My '), TextItem(processing_options, 'Title')] items = NumberedList(processing_options, [OutlineItem(processing_options,TextItem(processing_options, 'Item1'), 0, '1234'), OutlineItem(processing_options, TextItem(processing_options, 'Item2'), 1, '2345'), ]) expected = '<h2>My Title</h2><h4><ol><li><a href="1234">Item1</a></li><ol><li><a href="2345">Item2</a></li></ol></ol></h4>' result = html_string_builders.table_of_contents(title_contents, items) assert result == expected
def test_checklist_item_item(checked, indent, expected, processing_options): contents = [ TextItem(processing_options, "This is "), TextItem(processing_options, "check"), TextItem(processing_options, " one") ] result = markdown_string_builders.checklist_item(contents, checked, indent) assert result == expected
def test_checklist_item_for_html_output(checked, indent, expected, processing_options): contents = [TextItem(processing_options, "This is "), TextItem(processing_options, "check"), TextItem(processing_options, " one")] checklist_item = ChecklistItem(processing_options, contents, indent, checked) result = checklist_item.html() assert result == expected assert result == expected
def test_get_tags_from_contents_no_tags_in_content(self, processing_options, conversion_settings): contents = [ Paragraph(processing_options, [TextItem(processing_options, 'some text')]), Paragraph(processing_options, [TextItem(processing_options, 'some more text')]), Paragraph(processing_options, [TextItem(processing_options, 'even more text')]), ] note = NimbusNote(processing_options, contents, conversion_settings, 'My Note') expected = set() result = note.get_tags_from_contents() assert result == expected
def test_get_tags_from_contents(self, processing_options, conversion_settings): contents = [ Paragraph(processing_options, [TextItem(processing_options, '#tag1')]), Paragraph(processing_options, [TextItem(processing_options, '#tag2')]), Paragraph(processing_options, [TextItem(processing_options, 'some text')]), ] note = NimbusNote(processing_options, contents, conversion_settings, 'My Note') expected = {'#tag1', '#tag2'} result = note.get_tags_from_contents() assert result == expected
def test_note_markdown(self, processing_options, conversion_settings): contents = [ Paragraph(processing_options, [TextItem(processing_options, '#tag1')]), Paragraph(processing_options, [TextItem(processing_options, '#tag2')]), Paragraph(processing_options, [TextItem(processing_options, 'some text')]), ] note = NimbusNote(processing_options, contents, conversion_settings, 'My Note') expected = '#tag1\n#tag2\nsome text\n' result = note.markdown() assert result == expected
def test_note_html(self, processing_options, conversion_settings): contents = [ Paragraph(processing_options, [TextItem(processing_options, '#tag1')]), Paragraph(processing_options, [TextItem(processing_options, '#tag2')]), Paragraph(processing_options, [TextItem(processing_options, 'some text')]), ] note = NimbusNote(processing_options, contents, conversion_settings, 'My Note') expected = '<!doctype html><html lang="en"><p>#tag1</p><p>#tag2</p><p>some text</p></html>' result = note.html() assert result == expected
def test_pipe_table_row(processing_options): contents = [ TextItem(processing_options, 'Row Item 1'), TextItem(processing_options, 'Row Item 2'), ] table_row = TableRow(processing_options, contents) expected = "|Row Item 1|Row Item 2|\n" result = table_row.markdown() assert result == expected
def test_html_output(self, processing_options): contents = [ TableItem(processing_options, [TextItem(processing_options, 'Column 1')]), TableItem(processing_options, [TextItem(processing_options, 'Column 2')]), ] row = TableRow(processing_options, contents) expected = "<tr><tr><td>Column 1</td><td>Column 2</td></tr>" result = row.html() assert result == expected
def test_pipe_table_header(processing_options): contents = [ TextItem(processing_options, 'Column 1'), TextItem(processing_options, 'Column 2'), ] header_row = TableHeader(processing_options, contents) expected = "\n|Column 1|Column 2|\n|--|--|\n" result = header_row.markdown() assert result == expected
def test_checklist(processing_options): contents = [ ChecklistItem(processing_options, [TextItem(processing_options, 'Check 1')], 1, True), ChecklistItem(processing_options, [TextItem(processing_options, 'Check 2')], 2, False), ] checklist = Checklist(processing_options, contents) expected = "\t- [x] Check 1\n\t\t- [ ] Check 2\n\n" result = checklist.markdown() assert result == expected
def extract_from_nimbus_table_date_item( cell_tag, processing_options: NimbusProcessingOptions): """ Extract data form a Nimbus table date item. Nimbus HTML uses a tag class to specify a the date item. Returns a text as a TextItem in a TableItem wrapper Parameters ========== processing_options : NimbusProcessingOptions Processing options for nimbus html conversion cell_tag : beautiful soup <td> tag object Returns ======= TableItem Date text as a TextItem in a TableItem wrapper """ if cell_tag.name != 'td': return span_tag = cell_tag.find('span', class_="input-date-text") if not span_tag: return return TableItem(processing_options, [TextItem(processing_options, span_tag.text)])
def extract_from_nimbus_table_rating_item( cell_tag, processing_options: NimbusProcessingOptions): """ Extract data form a Nimbus table rating item. Nimbus HTML uses a <span> tag and tag class to specify each star in the rating. Count the number of active stars and return a rating string. Returns a Hyperlink object in a TableItem wrapper Parameters ========== processing_options : NimbusProcessingOptions Processing options for nimbus html conversion cell_tag : beautiful soup <td> tag object Returns ======= TableItem Hyperlink object in a TableItem wrapper """ if cell_tag.name != 'td': return active_stars = cell_tag.find_all('span', class_="rating-active") if active_stars: rating_text = f'Rating {len(active_stars)}/5 stars' return TableItem(processing_options, [TextItem(processing_options, rating_text)])
def extract_from_nimbus_table_progress_item( cell_tag, processing_options: NimbusProcessingOptions): """ Extract data form a Nimbus table progress item. Nimbus HTML uses a <span> tag and tag class to specify the progress as a text value. Returns a Hyperlink object in a TableItem wrapper Parameters ========== processing_options : NimbusProcessingOptions Processing options for nimbus html conversion cell_tag : beautiful soup <td> tag object Returns ======= TableItem TextItem object in a TableItem wrapper """ if cell_tag.name != 'td': return progress_span = cell_tag.find('span', class_="progress-value") if progress_span: progress_text = f'Progress {progress_span.text}' return TableItem(processing_options, [TextItem(processing_options, progress_text)])
def extract_from_nimbus_bookmark(tag, processing_options: NimbusProcessingOptions): if tag.name != 'div' or not tag.get( 'class') or 'nimbus-bookmark' not in tag['class']: return a_tag = tag.find('a') items = [] if a_tag: href = a_tag['href'] text_tag = tag.find('div', class_='nimbus-bookmark__info__name') display_text = text_tag.text link_object = Hyperlink(processing_options, display_text, href) items.append(link_object) description_tag = tag.find('div', class_="nimbus-bookmark__info__desc") if description_tag: description_object = TextItem(processing_options, description_tag.text) items.append(description_object) image_div_tag = tag.find("div", class_="nimbus-bookmark__preview") if image_div_tag: image_tag = image_div_tag.find('img') if image_tag: image_data_object = html_data_extractors.extract_from_img_tag( image_tag, processing_options) image_data_object.width = "280" # match max width size in nimbus css items.append(image_data_object) return Paragraph(processing_options, items)
def extract_from_nimbus_table_select_item( cell_tag, processing_options: NimbusProcessingOptions): """ Extract data form a Nimbus table select item. Nimbus HTML uses a tag class to specify a select item. Returns a text as a TextItem in a TableItem wrapper Parameters ========== processing_options : NimbusProcessingOptions Processing options for nimbus html conversion cell_tag : beautiful soup <td> tag object Returns ======= TableItem Select text as a TextItem in a TableItem wrapper """ if cell_tag.name != 'td': return span_tags = cell_tag.find_all('span', class_="select-label-text") if not span_tags: return cell_text = '' for span in span_tags: cell_text = f"{cell_text}{span.text} " cell_text = cell_text.strip() return TableItem(processing_options, [TextItem(processing_options, cell_text)])
def test_html_with_b_as_format(self, processing_options): text_format_item = TextFormatItem(processing_options, [TextItem(processing_options, 'some text')], 'b') expected = '<strong>some text</strong>' result = text_format_item.html() assert result == expected
def test_markdown_generation(self, heading, link_id, link_format, expected, processing_options): processing_options.export_format = link_format item = OutlineItem(processing_options, TextItem(processing_options, heading), 2, link_id) result = item.markdown() assert result == expected
def test_heading(heading, heading_id, id_format, expected, processing_options): content_item = TextItem(processing_options, heading) items = [content_item] result = markdown_string_builders.heading(items, 1, heading_id, id_format) assert result == expected
def extract_embed_block_quote_if_present(): """ Inner function to extract a block quote and caption from an embed div. A twitter block quote repeats itself twice in nimbus html so using the above blockquote extractor. This also result int the contents being indented as a blockquote like a twitter tweet does. """ embed_content_tag = div_tag.find('blockquote') if embed_content_tag: data = html_data_extractors.extract_from_blockquote( embed_content_tag, processing_options) # NOTE the exported remote frame and iframe work using process_child items # but a twitter block quote repeats itself twice in nimbus html so using the above blockquote extractor. # This also indents the contents as a blockquote like a twitter tweet does # and because we have extracted just the blockquote we need to now try and get the caption try: caption_items = extract_from_nimbus_attachment_caption( div_tag.find('div', class_='attachment-caption'), processing_options) except AttributeError: # all is OK just no caption data asking for forgiveness approach we make an empty entry caption_items = Caption(processing_options, [TextItem(processing_options, '')]) if data: return EmbedNimbus(processing_options, data, caption_items)
def test_table_item_markdown(self, processing_options): item = TableItem(processing_options, [TextItem(processing_options, 'Column 1')]) expected = 'Column 1' result = item.markdown() assert result == expected
def test_find_tags_stop_when_not_a_paragraph_or_title_item(self, processing_options, conversion_settings): contents = [ Head(processing_options, [TextItem(processing_options, 'title')]), Body(processing_options, [ Paragraph(processing_options, [TextItem(processing_options, '#tag1/tag3')]), Paragraph(processing_options, [TextItem(processing_options, '#tag2')]), TextItem(processing_options, 'my title'), ] ) ] note = NimbusNote(processing_options, contents, conversion_settings, 'My Note') note.find_tags() assert set(note.tags) == {'tag1', 'tag2', 'tag3'}
def test_anchor_link(processing_options): contents = TextItem(processing_options, 'Item1') link_id = '1234' expected = '<a href="1234">Item1</a>' result = html_string_builders.anchor_link(contents, link_id) assert result == expected
def test_post_init_no_target_filename_provided(self, processing_options): file = FileAttachmentCleanHTML(processing_options, TextItem(processing_options, 'my_contents'), 'href_folder/file.pdf', ) assert file.source_path == Path('href_folder/file.pdf') assert file.target_filename == 'file.pdf'
def test_markdown_anchor_tag_link(heading, link_id, link_format, expected, processing_options): contents = TextItem(processing_options, heading) result = markdown_string_builders.markdown_anchor_tag_link( contents, link_id, link_format) assert result == expected
def test_numbered_list_item(processing_options): contents = [TextItem(processing_options, "1989. was a good year")] expected = "1989\\. was a good year" result = markdown_string_builders.numbered_list_item(contents) assert result == expected