def test_is_html_based_content(conversion_input, expected):
    cs = ConversionSettings()
    cs.conversion_input = conversion_input
    file_converter = HTMLToMDConverter(cs, 'files_to_convert')

    result = file_converter.is_html_based_content()
    assert result is expected
def test_add_metadata_md_to_content(content, metadata, front_matter_format,
                                    expected):
    conversion_settings = ConversionSettings()
    conversion_settings.front_matter_format = front_matter_format
    metadata_processor = MetaDataProcessor(conversion_settings)
    metadata_processor._metadata = metadata
    result = metadata_processor.add_metadata_md_to_content(content)

    assert result == expected
def test_convert_tag_sting_to_tag_list(html_string, expected, schema):
    conversion_settings = ConversionSettings()
    conversion_settings.metadata_schema = schema
    metadata_processor = MetaDataProcessor(conversion_settings)

    metadata_processor.parse_html_metadata(html_string)

    metadata_processor.convert_tag_sting_to_tag_list()

    assert metadata_processor.metadata == expected
 def setUp(self):
     self.conversion_settings = ConversionSettings()
     self.conversion_settings.set_quick_setting('gfm')
     files_to_convert = [Path('not_existing.md'),
                         Path('some_markdown-old-1.md'),
                         Path('renaming source file failed'),
                         Path('test_html_file.md'),
                         Path('/a_folder/test_html_file.html'),
                         ]
     self.file_converter = HTMLToMDConverter(self.conversion_settings, files_to_convert)
     self.file_converter._metadata_processor = MetaDataProcessor(self.conversion_settings)
def test_add_metadata_html_to_content():
    conversion_settings = ConversionSettings()
    conversion_settings.front_matter_format = 'yaml'
    metadata_processor = MetaDataProcessor(conversion_settings)
    metadata_processor._metadata = {
        'title': 'My Title',
        'ctime': '1234',
        'mtime': '5678',
        'tags': ["Tag1", "Tag1/SubTag1", "Tag1/SubTag1/SubSubTag1", "Tag2"]
    }
    content = """<!DOCTYPE html>

    <html lang="" xml:lang="" xmlns="http://www.w3.org/1999/xhtml">
    <head>
    <meta charset="utf-8"/>
    <meta content="pandoc" name="generator"/>
    <meta content="width=device-width, initial-scale=1.0, user-scalable=yes" name="viewport"/>
    <title>-</title>
    <style>
        html {
          line-height: 1.5;
          font-family: Georgia, serif;
          font-size: 20px;
          color: #1a1a1a;
          background-color: #fdfdfd;
        }
    </style>
    </head></html>
    """

    expected = """<!DOCTYPE html>

<html lang="" xml:lang="" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta content="pandoc" name="generator"/>
<meta content="width=device-width, initial-scale=1.0, user-scalable=yes" name="viewport"/>
<title>My Title</title>
<style>
        html {
          line-height: 1.5;
          font-family: Georgia, serif;
          font-size: 20px;
          color: #1a1a1a;
          background-color: #fdfdfd;
        }
    </style>
<meta title="My Title"/><meta ctime="1234"/><meta mtime="5678"/><meta tags="Tag1,Tag1/SubTag1,Tag1/SubTag1/SubSubTag1,Tag2"/></head></html>
"""

    result = metadata_processor.add_metadata_html_to_content(content)

    assert result == expected
def test_add_text_metadata_to_content(md_string, markdown_conversion_input,
                                      expected):
    conversion_settings = ConversionSettings()
    conversion_settings.markdown_conversion_input = markdown_conversion_input
    conversion_settings.metadata_schema = ['title', 'ctime', 'mtime', 'tag']
    metadata_processor = MetaDataProcessor(conversion_settings)
    metadata_processor.parse_md_metadata(md_string)
    content = "hello"

    result = metadata_processor.add_text_metadata_to_content(content)

    assert result == expected
def test_add_tag_prefix_if_required(md_string, expected):
    conversion_settings = ConversionSettings()
    conversion_settings.tag_prefix = '#'
    conversion_settings.metadata_schema = ['']
    metadata_processor = MetaDataProcessor(conversion_settings)

    # md_string = "---\nctime: '202102122352'\nmtime: '202104242208'\ntag:\n- Tag1\n- Tag1/SubTag1\n- Tag1/SubTag1/SubSubTag1\n- Tag2\ntitle: test page\n---\n\n# This is H1"
    # expected = {'ctime': '202102122352', 'mtime': '202104242208', 'tag': ['#Tag1', '#Tag1/SubTag1', '#Tag1/SubTag1/SubSubTag1', '#Tag2'], 'title': 'test page'}

    metadata_processor.parse_md_metadata(md_string)

    metadata_processor.add_tag_prefix_if_required()

    assert metadata_processor.metadata == expected
def test_split_tags_if_required_with_tags_key(html_string, expected):
    conversion_settings = ConversionSettings()
    conversion_settings.split_tags = True
    conversion_settings.metadata_schema = ['']
    metadata_processor = MetaDataProcessor(conversion_settings)

    metadata_processor.parse_html_metadata(html_string)

    metadata_processor.convert_tag_sting_to_tag_list()

    if 'tags' in metadata_processor.metadata:
        assert sorted(metadata_processor.metadata['tags']) == expected

    if 'tag' in metadata_processor.metadata:
        assert sorted(metadata_processor.metadata['tag']) == expected
Пример #9
0
    def test_set_out_put_extension(self):
        extension = file_mover.get_file_suffix_for(
            self.file_converter._conversion_settings.export_format)
        self.assertEqual('.md', extension,
                         'failed to select correct md extension')

        self.file_converter._conversion_settings = ConversionSettings()
        self.file_converter._conversion_settings.set_quick_setting('html')
        extension = file_mover.get_file_suffix_for(
            self.file_converter._conversion_settings.export_format)
        self.assertEqual('.html', extension,
                         'failed to select correct html extension')
Пример #10
0
    def test_convert(self):
        self.file_converter._conversion_settings = ConversionSettings()
        self.file_converter._conversion_settings.set_quick_setting('obsidian')
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(
                d.path, 'export')
            Path(d.path, 'export').mkdir()
            source_file = Path(d.path, 'some_markdown.md')
            source_file.write_text(
                '<img src="filepath/image.png" width="600">')

            self.file_converter.convert_note(source_file)

            result = self.file_converter._post_processed_content
            self.assertEqual('![|600](filepath/image.png)\n', result,
                             'failed to convert file')
def test_get_attachment_paths_where_make_absolute_for_non_copyable_files(tmp_path):
    Path(tmp_path, 'some_folder/data/my_notebook/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/my_other_notebook/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/my_notebook/note.md').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/one.png').touch()
    Path(tmp_path, 'some_folder/data/attachments/two.csv').touch()
    Path(tmp_path, 'some_folder/three.png').touch()
    Path(tmp_path, 'some_folder/attachments/four.csv').touch()
    Path(tmp_path, 'some_folder/four.csv').touch()
    Path(tmp_path, 'some_folder/data/my_other_notebook/attachments/five.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/six.csv').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/eight.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/nine.md').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/ten.png').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/eleven.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/file twelve.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/file fourteen.png').touch()

    file_path = Path(tmp_path, 'some_folder/data/my_notebook/note.md')
    content = f'![copyable|600]({str(tmp_path)}/some_folder/data/my_notebook/attachments/one.png)\n' \
              f'![non-existing|600]({str(tmp_path)}/some_folder/two.png)\n' \
              f'![non-copyable|600]({str(tmp_path)}/some_folder/three.png)\n' \
              f'![non-copyable|600](../../three.png)\n' \
              f'![non-existing|600](attachments/three.pdf)\n' \
              f'![copyable|600](attachments/eight.pdf)\n' \
              f'![copyable](../attachments/two.csv)\n' \
              f'![non-copyable](../../attachments/four.csv)\n' \
              f'![non-existing](../my_notebook/seven.csv)\n' \
              f'![copyable](../my_notebook/six.csv)\n' \
              f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
              f'![note link](nine.md)\n' \
              f'[a web link](https://www.google.com "google")\n' \
              f'<img src="attachments/ten.png" />\n' \
              f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
              f'![copyable](attachments/file%20twelve.pdf)\n' \
              f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
              f'<img src="attachments/file%20fourteen.png" />\n' \
              f'<a href="https://www.google.com "google">)'

    # expected_content = f'![copyable|600]({str(tmp_path)}/some_folder/data/my_notebook/attachments/one.png)\n' \
    #                    f'![non-existing|600]({str(tmp_path)}/some_folder/two.png)\n' \
    #                    f'![non-copyable|600]({str(tmp_path)}/some_folder/three.png)\n' \
    #                    f'![non-copyable|600]({str(tmp_path)}/some_folder/three.png)\n' \
    #                    f'![non-existing|600](attachments/three.pdf)\n' \
    #                    f'![copyable|600](attachments/eight.pdf)\n' \
    #                    f'![copyable](../attachments/two.csv)\n' \
    #                    f'![non-copyable]({str(tmp_path)}/some_folder/attachments/four.csv)\n' \
    #                    f'![non-existing](../my_notebook/seven.csv)\n' \
    #                    f'![copyable](../my_notebook/six.csv)\n' \
    #                    f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
    #                    f'![note link](nine.md)\n' \
    #                    f'[a web link](https://www.google.com "google")\n' \
    #                    f'<img src="attachments/ten.png" />\n' \
    #                    f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
    #                    f'![copyable](attachments/file%20twelve.pdf)\n' \
    #                    f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
    #                    f'<img src="attachments/file%20fourteen.png" />\n' \
    #                    f'<a href="https://www.google.com "google">)'

    conversion_settings = ConversionSettings()
    conversion_settings.source = Path(tmp_path, 'some_folder/data')
    conversion_settings.export_folder = Path(tmp_path, 'some_folder/export')
    conversion_settings.export_format = 'obsidian'
    # conversion_settings.conversion_input = 'markdown'
    file_converter = HTMLToMDConverter(conversion_settings, 'files_to_convert')
    file_converter._file = file_path
    file_converter._files_to_convert = {Path(tmp_path, 'some_folder/data/my_notebook/nine.md')}
    attachment_links = get_attachment_paths(file_converter._conversion_settings.source_absolute_root,
                                            file_converter._conversion_settings.conversion_input,
                                            file_converter._file,
                                            file_converter._files_to_convert, content)

    assert len(attachment_links.all) == 16
    assert len(attachment_links.copyable) == 9
    assert len(attachment_links.copyable_absolute) == 9
    assert len(attachment_links.existing) == 12
    assert len(attachment_links.non_copyable_absolute) == 1
    assert len(attachment_links.non_copyable_relative) == 2
    assert len(attachment_links.non_existing) == 4

    assert Path(tmp_path, 'some_folder/data/my_other_notebook/attachments/five.pdf') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/one.png') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/six.csv') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/attachments/two.csv') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/eight.pdf') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/ten.png') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/eleven.pdf') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/file twelve.pdf') \
           in attachment_links.copyable_absolute
    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/file fourteen.png') \
           in attachment_links.copyable_absolute

    assert len(attachment_links.copyable_absolute) == 9

    assert f'{tmp_path}/some_folder/two.png' in attachment_links.non_existing
    assert f'attachments/three.pdf' in attachment_links.non_existing
    assert f'../my_notebook/seven.csv' in attachment_links.non_existing
    assert f'attachments/file thirteen.pdf' in attachment_links.non_existing
    assert len(attachment_links.non_existing) == 4

    assert '../../attachments/four.csv' in attachment_links.non_copyable_relative
    assert '../../three.png' in attachment_links.non_copyable_relative
    assert len(attachment_links.non_copyable_relative) == 2
    # three.png is used twice, once with relative path and once with absolute path as we are
    # searching post processed content the content will already have had make absolute applied if it was turned on
    # so we still get one path to three in the relative list

    assert f'{tmp_path}/some_folder/three.png' in attachment_links.non_copyable_absolute
    assert len(attachment_links.non_copyable_absolute) == 1
def test_get_attachment_paths_markdown_export_format(tmp_path):
    Path(tmp_path, 'some_folder/data/my_notebook/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/my_other_notebook/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/my_notebook/note.md').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/one.png').touch()
    Path(tmp_path, 'some_folder/data/attachments/two.csv').touch()
    Path(tmp_path, 'some_folder/three.png').touch()
    Path(tmp_path, 'some_folder/attachments/four.csv').touch()
    Path(tmp_path, 'some_folder/four.csv').touch()
    Path(tmp_path, 'some_folder/data/my_other_notebook/attachments/five.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/six.csv').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/eight.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/nine.md').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/ten.png').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/eleven.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/file twelve.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/file fourteen.png').touch()

    file_path = Path(tmp_path, 'some_folder/data/my_notebook/note.md')
    content = f'![copyable|600]({str(tmp_path)}/some_folder/data/my_notebook/attachments/one.png)\n' \
              f'![non-existing|600]({str(tmp_path)}/some_folder/two.png)\n' \
              f'![non-copyable|600]({str(tmp_path)}/some_folder/three.png)\n' \
              f'![non-existing|600](attachments/three.pdf)\n' \
              f'![copyable|600](attachments/eight.pdf)\n' \
              f'![copyable](../attachments/two.csv)\n' \
              f'![non-copyable](../../attachments/four.csv)\n' \
              f'![non-existing](../my_notebook/seven.csv)\n' \
              f'![copyable](../my_notebook/six.csv)\n' \
              f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
              f'![note link](nine.md)\n' \
              f'[a web link](https://www.google.com "google")\n' \
              f'<img src="attachments/ten.png" />\n' \
              f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
              f'![copyable](attachments/file%20twelve.pdf)\n' \
              f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
              f'<img src="attachments/file%20fourteen.png" />'

    # expected_content = f'![copyable|600]({str(tmp_path)}/some_folder/data/my_notebook/attachments/one.png)\n' \
    #                    f'![non-existing|600]({str(tmp_path)}/some_folder/two.png)\n' \
    #                    f'![non-copyable|600]({str(tmp_path)}/some_folder/three.png)\n' \
    #                    f'![non-existing|600](attachments/three.pdf)\n' \
    #                    f'![copyable|600](attachments/eight.pdf)\n' \
    #                    f'![copyable](../attachments/two.csv)\n' \
    #                    f'![non-copyable](../attachments/four.csv)\n' \
    #                    f'![non-existing](../my_notebook/seven.csv)\n' \
    #                    f'![copyable](../my_notebook/six.csv)\n' \
    #                    f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
    #                    f'![note link](nine.md)\n' \
    #                    f'[a web link](https://www.google.com "google")\n' \
    #                    f'<img src="attachments/ten.png" />\n' \
    #                    f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
    #                    f'![copyable](attachments/file%20twelve.pdf)\n' \
    #                    f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
    #                    f'<img src="attachments/file%20fourteen.png" />'

    conversion_settings = ConversionSettings()
    conversion_settings.source = Path(tmp_path, 'some_folder/data')
    conversion_settings.export_folder = Path(tmp_path, 'some_folder/export')
    conversion_settings.export_format = 'obsidian'
    conversion_settings.make_absolute = False
    # conversion_settings.conversion_input = 'markdown'
    file_converter = HTMLToMDConverter(conversion_settings, 'files_to_convert')
    file_converter._file = file_path
    file_converter._files_to_convert = {Path(tmp_path, 'some_folder/data/my_notebook/nine.md')}
    attachment_links = get_attachment_paths(file_converter._conversion_settings.source_absolute_root,
                                            file_converter._conversion_settings.conversion_input,
                                            file_converter._file,
                                            file_converter._files_to_convert, content)

    assert len(attachment_links.all) == 15
    assert len(attachment_links.copyable) == 9
    assert len(attachment_links.copyable_absolute) == 9

    assert len(attachment_links.existing) == 11
    assert len(attachment_links.non_copyable_relative) == 1
    assert len(attachment_links.non_copyable_absolute) == 1

    assert len(attachment_links.non_existing) == 4

    assert Path(tmp_path,
                'some_folder/data/my_other_notebook/attachments/five.pdf') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/one.png') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/six.csv') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/attachments/two.csv') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/eight.pdf') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/ten.png') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/eleven.pdf') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/file twelve.pdf') in attachment_links.copyable_absolute
    assert Path(tmp_path,
                'some_folder/data/my_notebook/attachments/file fourteen.png') in attachment_links.copyable_absolute
    assert len(attachment_links.copyable_absolute) == 9

    assert f'{tmp_path}/some_folder/two.png' in attachment_links.non_existing
    assert f'attachments/three.pdf' in attachment_links.non_existing
    assert f'../my_notebook/seven.csv' in attachment_links.non_existing
    assert f'attachments/file thirteen.pdf' in attachment_links.non_existing
    assert len(attachment_links.non_existing) == 4

    # NOTE for the "some_folder/attachments/four.csv" attachment the content should be updated to a new relative link
    # assert Path(tmp_path, 'some_folder/attachments/four.csv') in file_converter._non_copyable_attachment_path_set
    assert '../../attachments/four.csv' in attachment_links.non_copyable_relative
    assert len(attachment_links.non_copyable_relative) == 1
    assert f'{tmp_path}/some_folder/three.png' in attachment_links.non_copyable_absolute
    assert len(attachment_links.non_copyable_absolute) == 1
class TestHTMLToMDConverter(unittest.TestCase):

    def setUp(self):
        self.conversion_settings = ConversionSettings()
        self.conversion_settings.set_quick_setting('gfm')
        files_to_convert = [Path('not_existing.md'),
                            Path('some_markdown-old-1.md'),
                            Path('renaming source file failed'),
                            Path('test_html_file.md'),
                            Path('/a_folder/test_html_file.html'),
                            ]
        self.file_converter = HTMLToMDConverter(self.conversion_settings, files_to_convert)
        self.file_converter._metadata_processor = MetaDataProcessor(self.conversion_settings)

    def test_pre_process_content(self):
        self.file_converter._file_content = '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head><p><input checked="" type="checkbox"/>Check 1</p><p><input type="checkbox"/>Check 2</p><p><a href="/a_folder/test_html_file.html">html file</a></p>'
        self.file_converter._metadata_schema = ['title']
        self.file_converter._file = Path('a-file.html')
        self.file_converter._conversion_settings.export_format = 'obsidian'
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(d.path)

            self.file_converter.pre_process_content()
            self.assertTrue('checklist-placeholder-id'
                            in self.file_converter._pre_processed_content,
                            'Failed to insert checklist placeholders',
                            )
            self.assertTrue('<p><a href="/a_folder/test_html_file.md">html file</a></p>'
                            in self.file_converter._pre_processed_content,
                            'Failed to change link extension placeholders',
                            )
            self.assertTrue({'title': 'this is test2'} == self.file_converter._metadata_processor.metadata,
                            'Failed to parse meta data',
                            )

    def test_pre_process_content2_rename_existing_file_and_its_link_in_content(self):
        self.file_converter._file_content = '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head><p><input checked="" type="checkbox"/>Check 1</p><p><input type="checkbox"/>Check 2</p><p><a href="a-file.md">existing_md file</a></p>'
        self.file_converter._metadata_schema = ['title']
        self.file_converter._file = Path('a-file.md')
        self.file_converter._conversion_settings.export_format = 'gfm'
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(d.path)
            Path(d.path, 'a-file.md').touch()

            self.file_converter.pre_process_content()
            assert Path(d.path, 'a-file-old-1.md').exists()

            self.assertTrue('a-file-old-1.md'
                            in self.file_converter._pre_processed_content,
                            'Failed to rename existing file link placeholders',
                            )

    def test_post_process_content2(self):
        self.file_converter._file_content = '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head><p><input checked="" type="checkbox"/>Check 1</p><p><input type="checkbox"/>Check 2</p><img src="filepath/image.png" width="600"><p><iframe allowfullscreen="" anchorhref="https://www.youtube.com/watch?v=SqdxNUMO2cg" frameborder="0" height="315" src="https://www.youtube.com/embed/SqdxNUMO2cg" width="420" youtube="true"> </iframe></p>'
        self.file_converter._metadata_schema = ['title']
        self.file_converter._file = Path('a-file.html')
        self.file_converter._conversion_settings.export_format = 'pandoc_markdown'
        self.file_converter._conversion_settings.conversion_input = 'html'
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(d.path)
            self.file_converter.pre_process_content()
            self.file_converter.convert_content()

            # set toml and confirm content is forced back into yaml
            self.file_converter._metadata_processor._conversion_settings.front_matter_format = 'toml'

            self.file_converter.post_process_content()

            self.assertEqual(
                '---\ntitle: this is test2\n---\n\n- [x] Check 1\n\n- [ ] Check 2\n\n<img src="filepath/image.png" width="600" />\n\n\n<iframe allowfullscreen="" anchorhref="https://www.youtube.com/watch?v=SqdxNUMO2cg" frameborder="0" height="315" src="https://www.youtube.com/embed/SqdxNUMO2cg" width="420" youtube="true"> </iframe>\n\n',
                self.file_converter._post_processed_content,
                'post processing failed'
                )

    def test_post_process_content3(self):
        self.file_converter._file_content = '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head><p><input checked="" type="checkbox"/>Check 1</p><p><input type="checkbox"/>Check 2</p><img src="filepath/image.png" width="600">'
        self.file_converter._metadata_schema = ['title']
        self.file_converter._file = Path('a-file.html')
        self.file_converter._conversion_settings.conversion_input = 'nsx'
        self.file_converter._conversion_settings.export_format = 'obsidian'
        self.file_converter.pre_process_content()
        self.file_converter.convert_content()
        self.file_converter.post_process_content()
        assert self.file_converter._post_processed_content == '---\ntitle: this is test2\n---\n\n- [x] Check 1\n\n- [ ] Check 2\n\n![|600](filepath/image.png)\n'

    def test_parse_metadata_if_required(self):
        self.file_converter._conversion_settings.export_format = 'obsidian'
        self.file_converter._metadata_processor._metadata = {}
        self.file_converter._metadata_processor._metadata_schema = ['title', 'creation_time']
        self.file_converter._pre_processed_content = '<head><meta title="this is test2"/><meta creation_time="test-meta-content"/></head>'
        self.file_converter.parse_metadata_if_required()
        self.assertEqual({'title': 'this is test2'},
                         self.file_converter._metadata_processor.metadata,
                         'meta data not parsed correctly'
                         )

        self.file_converter._metadata_processor._metadata = {}
        self.file_converter._metadata_processor._metadata_schema = ['title']
        self.file_converter._pre_processed_content = '<meta title="this is test2"/><meta creation_time="test-meta-content"/>'
        self.file_converter.parse_metadata_if_required()
        self.assertEqual({},
                         self.file_converter._metadata_processor.metadata,
                         'meta data not ignored if no head section'
                         )

        self.file_converter._metadata_processor._metadata = {}
        self.file_converter._metadata_processor._metadata_schema = ['title']
        self.file_converter._pre_processed_content = '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head>'
        self.file_converter.parse_metadata_if_required()
        self.assertEqual({'title': 'this is test2'},
                         self.file_converter._metadata_processor.metadata, 'meta data not parsed correctly')

        self.file_converter._conversion_settings.export_format = 'pandoc_markdown'
        self.file_converter._metadata_processor._metadata = {}
        self.file_converter._metadata_processor._metadata_schema = ['title', 'creation_time']
        self.file_converter._pre_processed_content = '<head><meta title="this is test2"/><meta creation_time="test-meta-content"/></head>'
        self.file_converter.parse_metadata_if_required()
        self.assertEqual({'title': 'this is test2'},
                         self.file_converter._metadata_processor.metadata,
                         'meta data not parsed correctly'
                         )
Пример #14
0
def test_generate_set_of_attachment_paths_html_export_format(tmp_path):
    Path(tmp_path,
         'some_folder/data/my_notebook/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/attachments').mkdir(parents=True)
    Path(tmp_path, 'some_folder/data/attachments').mkdir(parents=True)
    Path(tmp_path,
         'some_folder/data/my_other_notebook/attachments').mkdir(parents=True)
    Path(tmp_path,
         'some_folder/data/my_other_notebook/attachments/five.pdf').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/nine.md').touch()
    Path(tmp_path, 'some_folder/data/my_notebook/attachments/ten.png').touch()
    Path(tmp_path,
         'some_folder/data/my_notebook/attachments/eleven.pdf').touch()
    Path(tmp_path,
         'some_folder/data/my_notebook/attachments/file twelve.pdf').touch()
    Path(tmp_path,
         'some_folder/data/my_notebook/attachments/file fourteen.png').touch()

    file_path = Path(tmp_path, 'some_folder/data/my_notebook/note.md')
    content = f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
              f'![note link](nine.md)\n' \
              f'[a web link](https://www.google.com "google")\n' \
              f'<img src="attachments/ten.png" />\n' \
              f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
              f'![copyable](attachments/file%20twelve.pdf)\n' \
              f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
              f'<img src="attachments/file%20fourteen.png" />'

    # expected_content = f'![copyable](../my_other_notebook/attachments/five.pdf "test tool tip text")\n' \
    #                    f'![note link](nine.md)\n' \
    #                    f'[a web link](https://www.google.com "google")\n' \
    #                    f'<img src="attachments/ten.png" />\n' \
    #                    f'<a href="attachments/eleven.pdf">example-attachment.pdf</a>\n' \
    #                    f'![copyable](attachments/file%20twelve.pdf)\n' \
    #                    f'<a href="attachments/file%20thirteen.pdf">example-attachment.pdf</a>\n' \
    #                    f'<img src="attachments/file%20fourteen.png" />'

    conversion_settings = ConversionSettings()
    conversion_settings.source = Path(tmp_path, 'some_folder/data')
    conversion_settings.export_folder = Path(tmp_path, 'some_folder/export')
    conversion_settings.export_format = 'html'
    file_converter = MDToMDConverter(conversion_settings, 'files_to_convert')
    file_converter._file = file_path
    file_converter._files_to_convert = {
        Path(tmp_path, 'some_folder/data/my_notebook/nine.md')
    }
    attachment_links = get_attachment_paths(
        file_converter._conversion_settings.source_absolute_root,
        file_converter._conversion_settings.conversion_input,
        file_converter._file, file_converter._files_to_convert, content)

    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/ten.png') \
           in attachment_links.copyable_absolute

    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/eleven.pdf') \
           in attachment_links.copyable_absolute

    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/file fourteen.png') \
           in attachment_links.copyable_absolute

    assert Path(tmp_path, 'some_folder/data/my_notebook/attachments/file twelve.pdf') \
           in attachment_links.copyable_absolute

    assert Path(tmp_path, 'some_folder/data/my_other_notebook/attachments/five.pdf') \
           in attachment_links.copyable_absolute

    assert len(attachment_links.copyable_absolute) == 5

    assert f'attachments/file thirteen.pdf' in attachment_links.non_existing

    assert len(attachment_links.non_existing) == 1
class TestMetaDataProcessor(unittest.TestCase):
    def setUp(self) -> None:
        self.conversion_settings = ConversionSettings()
        self.conversion_settings.set_quick_setting('gfm')
        self.metadata_processor = MetaDataProcessor(self.conversion_settings)

    def test_remove_tag_spaces_if_required(self):
        test_data_sets = [
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags':
                ["Tag1", "Tag1/SubTag1", "Tag1/SubTag1/SubSubTag1", "Tag2"]
            }, False, {
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags':
                ["Tag1", "Tag1/SubTag1", "Tag1/SubTag1/SubSubTag1", "Tag2"]
            }, 'removing spaces failed when there were no spaces'),
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, False, {
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub-Tag1", "Tag1/Sub-Tag1/Sub-Sub-Tag1",
                    "Tag2"
                ]
            }, 'removing spaces failed when there were spaces'),
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, True, {
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, 'removing spaces failed when NOT required'),
        ]
        for test_set in test_data_sets:
            with self.subTest(msg=f'Testing {test_set}'):
                self.metadata_processor._metadata = test_set[0]
                self.metadata_processor._spaces_in_tags = test_set[1]
                self.metadata_processor.remove_tag_spaces_if_required()

                self.assertEqual(test_set[2], self.metadata_processor.metadata,
                                 test_set[3])

    def test_split_tags_if_required(self):
        test_data_sets = [
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags':
                ["Tag1", "Tag1/SubTag1", "Tag1/SubTag1/SubSubTag1", "Tag2"]
            }, True, {
                'title': 'My Title',
                'ctime': '1234',
                'mtime': '5678',
                'tags': ["Tag1", "SubTag1", "SubSubTag1", "Tag2"]
            }, 'splitting tags with no spaces failed'),
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, True, {
                'title': 'My Title',
                'ctime': '1234',
                'mtime': '5678',
                'tags': ["Tag1", "Sub Tag1", "Sub Sub Tag1", "Tag2"]
            }, 'splitting tags with spaces failed'),
            ({
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, False, {
                'title':
                'My Title',
                'ctime':
                '1234',
                'mtime':
                '5678',
                'tags': [
                    "Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1",
                    "Tag2"
                ]
            }, 'splitting tags failed when NOT required'),
        ]
        for test_set in test_data_sets:
            with self.subTest(msg=f'Testing {test_set}'):
                self.metadata_processor._metadata = test_set[0]
                self.metadata_processor._split_tags = test_set[1]
                self.metadata_processor.split_tags_if_required()
                self.assertTrue(
                    sorted(test_set[2]['tags']) == sorted(
                        self.metadata_processor.metadata['tags']), test_set[3])

    def test_parse_dict_metadata(self):
        test_data_sets = [
            (['title', 'ctime', 'mtime'], {
                'title': 'My Title',
                'ctime': '1234',
                'mtime': '5678'
            }, {
                'title': 'My Title',
                'ctime': '1234',
                'mtime': '5678'
            }, 'generating selected metadata failed for clean data'),
            (['title', 'mtime'], {
                'title': 'My Title',
                'ctime': '1234',
                'content': 'my_content'
            }, {
                'title': 'My Title'
            }, 'generating metadata with "content" in metadata'),
            (['title', 'tags', 'ctime', 'mtime'], {
                'title': 'My Title',
                'ctime': '1234'
            }, {
                'title': 'My Title',
                'ctime': '1234'
            },
             'generating selected metadata failed for meta data missing one of the schema keys'
             ),
            ([], {
                'title': 'My Title',
                'ctime': '1234'
            }, {},
             'generating selected metadata failed for meta data missing a schema tag'
             ),
            (['title', 'tags', 'ctime', 'mtime'], {}, {},
             'generating selected metadata failed for empty metadata'),
        ]

        for test_set in test_data_sets:
            with self.subTest(msg=f'Testing {test_set}'):
                self.metadata_processor._metadata = {}
                self.metadata_processor._metadata_schema = test_set[0]
                self.metadata_processor.parse_dict_metadata(test_set[1])
                self.assertTrue(
                    test_set[2] == self.metadata_processor.metadata,
                    test_set[3])

        self.metadata_processor._split_tags = True
        self.metadata_processor._spaces_in_tags = False
        self.metadata_processor._metadata = {}
        self.metadata_processor._metadata_schema = ['tags', 'content']
        raw_metadata = {
            'tags':
            ["Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1", "Tag2"]
        }
        expected_result = {
            'tags': ["Tag1", "Sub-Tag1", "Sub-Sub-Tag1", "Tag2"]
        }
        self.metadata_processor.parse_dict_metadata(raw_metadata)

        self.assertTrue(
            sorted(expected_result['tags']) == sorted(
                self.metadata_processor.metadata['tags']),
            'generating metadata with tags failed')

    def test_add_metadata_html_to_content(self):
        content = '<head><title>-</title></head>'
        self.metadata_processor._metadata = {'title': 'My Title'}
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual(
            '<head><title>My Title</title><meta title="My Title"/></head>',
            new_content, 'title and meta data inserted incorrectly')

        content = """<!DOCTYPE html>

<html lang="" xml:lang="" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta content="pandoc" name="generator"/>
<meta content="width=device-width, initial-scale=1.0, user-scalable=yes" name="viewport"/>
<title>-</title>
<style>
    html {
      line-height: 1.5;
      font-family: Georgia, serif;
      font-size: 20px;
      color: #1a1a1a;
      background-color: #fdfdfd;
    }
</style>
</head></html>
"""
        self.metadata_processor._metadata = {
            'test': 'test-meta-content',
            'test2': 'this is test2'
        }

        expected_result = """<!DOCTYPE html>

<html lang="" xml:lang="" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta content="pandoc" name="generator"/>
<meta content="width=device-width, initial-scale=1.0, user-scalable=yes" name="viewport"/>
<title>-</title>
<style>
    html {
      line-height: 1.5;
      font-family: Georgia, serif;
      font-size: 20px;
      color: #1a1a1a;
      background-color: #fdfdfd;
    }
</style>
<meta test="test-meta-content"/><meta test2="this is test2"/></head></html>
"""

        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)

        self.assertEqual(expected_result, new_content,
                         'meta data inserted incorrectly')

        content = '<title>-</title>'
        self.metadata_processor._metadata = {
            'test': 'test-meta-content',
            'test2': 'this is test2'
        }
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual(
            '<title>-</title>', new_content,
            'meta data inserted incorrectly when there is no head')

        content = '<head></head><h1>hello</h1>'
        self.metadata_processor._metadata = {
            'test': 'test-meta-content',
            'test2': 'this is test2'
        }
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual(
            '<head><meta test="test-meta-content"/><meta test2="this is test2"/></head><h1>hello</h1>',
            new_content, 'meta data inserted incorrectly')

        content = '<head></head><h1>hello</h1>'
        self.metadata_processor._metadata = {}
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual('<head></head><h1>hello</h1>', new_content,
                         'meta data inserted incorrectly')

        content = '<title>-</title>'
        self.metadata_processor._metadata = {
            'test': 'test-meta-content',
            'test2': 'this is test2'
        }
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual('<title>-</title>', new_content,
                         'meta data inserted incorrectly')

        self.metadata_processor._conversion_settings.markdown_conversion_input = 'pandoc_markdown'
        content = '<head><title>-</title></head>'
        self.metadata_processor._metadata = {
            'test': 'test-meta-content',
            'test2': 'this is test2'
        }
        new_content = self.metadata_processor.add_metadata_html_to_content(
            content)
        self.assertEqual(
            '<head><title>-</title><meta test="test-meta-content"/><meta test2="this is test2"/></head>',
            new_content, 'meta data inserted incorrectly')

    def test_parse_html_meta_data(self):
        test_data_sets = [
            (['title', 'creation_time'],
             '<head><meta title="this is test2"/><meta charset="utf8"/><meta content="my_content"/><meta creation_time="test-meta-content"/></head>',
             {
                 'title': 'this is test2',
                 'creation_time': 'test-meta-content'
             }, 'meta data not parsed correctly'),
            (['title', 'creation_time'],
             '<meta title="this is test2"/><meta creation_time="test-meta-content"/>',
             {}, 'meta data not ignored if no head section'),
            (['title', 'creation_time'],
             '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head>',
             {
                 'title': 'this is test2'
             },
             'meta data not parsed correctly when meta not in schema present'),
            ([],
             '<head><meta title="this is test2"/><meta not_valid="not_in_schema"/></head>',
             {}, 'meta data not parsed correctly when there is no schema')
        ]
        for test_set in test_data_sets:
            with self.subTest(
                    msg=f'Testing paring of html for meta tags {test_set}'):
                self.metadata_processor._conversion_settings.metadata_schema = [
                    'title', 'ctime', 'mtime', 'tag', 'content'
                ]
                self.metadata_processor._metadata = {}
                self.metadata_processor._metadata_schema = test_set[0]
                self.metadata_processor.parse_html_metadata(test_set[1])
                self.assertEqual(test_set[2], self.metadata_processor.metadata,
                                 test_set[3])

    def test_format_tag_metadata_if_required(self):
        self.metadata_processor._split_tags = True
        self.metadata_processor._spaces_in_tags = False

        self.metadata_processor._metadata = {
            'tags':
            ["Tag1", "Tag1/Sub Tag1", "Tag1/Sub Tag1/Sub Sub Tag1", "Tag2"]
        }
        self.metadata_processor.format_tag_metadata_if_required()
        self.assertEqual(sorted(["Tag1", "Sub-Tag1", "Sub-Sub-Tag1", "Tag2"]),
                         sorted(self.metadata_processor.metadata['tags']),
                         'formatting tags if required failed')

    def test_parse_md_metadata(self):
        test_data_sets = [
            ('Hello', ['title', 'tag', 'ctime', 'mtime'
                       ], 'Hello', 'no meta data, content was incorrect', {},
             'no meta data to parse, resulted in having metadata'),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             ['title', 'tag', 'ctime',
              'mtime'], 'Hello', 'with md metadata, content was incorrect', {
                  'title': 'Hello, world!'
              }, 'with md metadata to parse, incorrect metadata'),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             [''], 'Hello',
             'with md metadata and empty schema, content was incorrect', {
                 'excerpt': 'tl;dr',
                 'layout': 'post',
                 'title': 'Hello, world!'
             }, 'with md metadata and empty schema, incorrect metadata'),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\ncontent: my content\n---\n\nHello',
             ['title', 'layout', 'ctime', 'mtime',
              'content'], 'Hello', 'with md metadata, content was incorrect', {
                  'title': 'Hello, world!',
                  'layout': 'post',
                  'content': 'my content'
              }, 'with md metadata to parse, incorrect metadata'),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             ['ctime', 'mtime'], 'Hello',
             'with md metadata and no vlaid matches in schema, content was incorrect',
             {},
             'with md metadata and no vlaid matches in schema, incorrect metadata'
             ),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             [], 'Hello',
             'with md metadata and empty schema, content was incorrect', {},
             'with md metadata and empty schema, incorrect metadata'),
        ]

        for test_set in test_data_sets:
            with self.subTest(
                    msg=f'Testing parsing meta data from MD {test_set}'):
                md_string = test_set[0]
                self.metadata_processor._metadata = {}
                self.metadata_processor._metadata_schema = test_set[1]
                new_content = self.metadata_processor.parse_md_metadata(
                    md_string)
                self.assertEqual(test_set[2], new_content, test_set[3])
                self.assertTrue(
                    test_set[4] == self.metadata_processor.metadata,
                    test_set[5])
 def setUp(self) -> None:
     self.conversion_settings = ConversionSettings()
     self.conversion_settings.set_quick_setting('gfm')
     self.metadata_processor = MetaDataProcessor(self.conversion_settings)
Пример #17
0
class TestMDToMDConverter(unittest.TestCase):
    def setUp(self):
        self.conversion_settings = ConversionSettings()
        self.conversion_settings.set_quick_setting('gfm')
        files_to_convert = [
            Path('not_existing.md'),
            Path('some_markdown-old-1.md'),
            Path('renaming source file failed')
        ]
        self.file_converter = MDToMDConverter(self.conversion_settings,
                                              files_to_convert)
        self.file_converter._metadata_processor = MetaDataProcessor(
            self.conversion_settings)

    def test_add_meta_data_if_required(self):
        test_data_sets = [
            ('Hello', {}, 'gfm', 'Hello',
             'no meta data, content was incorrect'),
            ('Hello', {
                'excerpt': 'tl;dr',
                'layout': 'post',
                'title': 'Hello, world!'
            }, 'gfm',
             '---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             'good meta string and content failed'),
            ('Hello', {
                'excerpt': 'tl;dr',
                'layout': 'post',
                'title': 'Hello, world!'
            }, 'pandoc_markdown',
             '---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             'good meta string and content failed')
        ]

        for test_set in test_data_sets:
            with self.subTest(msg=f'Testing {test_set}'):
                self.file_converter._post_processed_content = test_set[0]
                self.file_converter._metadata_processor._metadata = test_set[1]
                self.file_converter._conversion_settings.markdown_conversion_input = test_set[
                    2]
                self.file_converter.add_meta_data_if_required()
                self.assertEqual(test_set[3],
                                 self.file_converter._post_processed_content,
                                 test_set[4])

    def test_parse_metadata_if_required(self):
        test_data_sets = [
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             'gfm', {
                 'title': 'Hello, world!'
             }, 'good meta string failed', 'Hello', 'good meta string failed'),
            ('Hello', 'gfm', {}, 'no meta string failed', 'Hello',
             'no meta string failed'),
            ('---\nthis :is:nonsense\nmore\nnonsense\n---\n\nHello', 'gfm', {},
             'bad meta data failed', 'Hello', 'bad meta data failed'),
            ('---\nexcerpt: tl;dr\nlayout: post\ntitle: Hello, world!\n---\n\nHello',
             'pandoc_markdown', {
                 'title': 'Hello, world!'
             }, 'good meta failed with pandoc_markdown', 'Hello',
             'good meta with pandoc_markdown failed')
        ]
        for test_set in test_data_sets:
            with self.subTest(msg=f'Testing {test_set}'):
                self.file_converter._pre_processed_content = test_set[0]
                self.file_converter._conversion_settings.markdown_conversion_input = test_set[
                    1]
                self.file_converter.parse_metadata_if_required()
                self.assertEqual(
                    test_set[2],
                    self.file_converter._metadata_processor.metadata,
                    test_set[3])
                self.assertEqual(test_set[4],
                                 self.file_converter._pre_processed_content,
                                 test_set[5])

    def test_rename_target_file_if_already_exists(self):
        test_strings = [
            ('some_markdown.md', 'not_existing.md', 'some_markdown-old-1.md',
             'renaming source file failed'),
            ('some_markdown.md', 'some_markdown-old-1.md',
             'some_markdown-old-2.md',
             'renaming for existing old file failed'),
        ]
        for test_set in test_strings:
            with self.subTest(
                    msg=f'Testing when existing old set to {test_set[0]}'):
                with TempDirectory() as d:
                    # the order here is a little messy but it has to be like this not have the export folder renamed
                    # as if it is renamed then the new folder is empty and the rename existing file never runs

                    # set source to an existing folder
                    self.file_converter._conversion_settings.source = Path(
                        d.path)
                    # set export folder to non-existing folder so is an empty folder
                    self.file_converter._conversion_settings.export_folder = Path(
                        d.path, 'export')
                    # make the export folder
                    Path(d.path, 'export').mkdir(exist_ok=True)
                    # change the source to the export folder to where the source file will be
                    self.file_converter._conversion_settings.source = Path(
                        d.path, 'export')
                    # put the source file in the folder
                    source_file = Path(d.path, 'export', test_set[0])
                    source_file.touch()
                    source_file_old_exists = Path(d.path, 'export',
                                                  test_set[1])
                    source_file_old_exists.touch()
                    self.assertTrue(source_file.exists())
                    self.assertTrue(source_file_old_exists.exists())
                    self.file_converter._file = source_file
                    self.file_converter.rename_target_file_if_it_already_exists(
                    )
                    self.assertTrue(
                        Path(d.path, 'export', test_set[2]).exists(),
                        test_set[3])

        with TempDirectory() as d:
            self.file_converter._file = Path('does_not_exist.md')
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(
                d.path)
            self.file_converter.rename_target_file_if_it_already_exists()
            self.assertFalse(
                Path(d.path, 'does_not_exist.md').exists(),
                'failed to manage a not existing file name',
            )
            self.assertFalse(
                Path(d.path, 'does_not_exist-old.md').exists(),
                'failed to manage a not existing file name',
            )
            self.assertFalse(
                Path(d.path, 'does_not_exist-old-1.md').exists(),
                'failed to manage a not existing file name',
            )

    def test_pre_process_obsidian_image_links_if_required(self):
        test_strings = [
            ('obsidian', '![|600](filepath/image.png)',
             '<img src="filepath/image.png" width="600" />',
             'obsidian link to gfm failed'),
            ('obsidian', '![](filepath/image.png)', '![](filepath/image.png)',
             'markdown std link not left unchanged'),
            (
                'obsidian',
                '![|some-text](filepath/image.png)',
                '<img alt="|some-text" src="filepath/image.png" />',
                'markdown std with pipe and text link',
            ),
            ('commonmark', '![](filepath/image.png)',
             '![](filepath/image.png)',
             'non obsidian input image incorrectly changed')
        ]

        for test_set in test_strings:
            with self.subTest(
                    msg=f'Testing image link format {test_set[1]} conversion'):
                self.conversion_settings.markdown_conversion_input = test_set[
                    0]
                self.file_converter._pre_processed_content = test_set[1]
                self.file_converter.pre_process_obsidian_image_links_if_required(
                )
                self.assertEqual(test_set[2],
                                 self.file_converter._pre_processed_content,
                                 test_set[3])

    def test_pre_process_content(self):
        test_strings = [
            ('obsidian', '![|600](filepath/image.png)',
             '<img src="filepath/image.png" width="600" />',
             'obsidian link to gfm failed'),
            ('obsidian', '![](filepath/image.png)', '![](filepath/image.png)',
             'markdown std link not left unchanged'),
            (
                'obsidian',
                '![|some-text](filepath/image.png)',
                '<img alt="|some-text" src="filepath/image.png" />',
                'markdown std with pipe and text link',
            ),
            ('commonmark', '![](filepath/image.png)',
             '![](filepath/image.png)',
             'non obsidian input image incorrectly changed')
        ]

        for test_set in test_strings:
            with self.subTest(
                    msg=f'Testing image link format {test_set[1]} conversion'):
                with TempDirectory() as d:
                    source_file = Path(d.path, 'some_markdown.md')
                    source_file.touch()
                    self.assertTrue(source_file.exists())
                    self.file_converter._file = source_file

                    self.conversion_settings.markdown_conversion_input = test_set[
                        0]
                    self.file_converter._file_content = test_set[1]
                    self.file_converter._conversion_settings.source = Path(
                        d.path)
                    self.file_converter._conversion_settings.export_folder = Path(
                        d.path)
                    self.file_converter.pre_process_content()
                    self.assertEqual(
                        test_set[2],
                        self.file_converter._pre_processed_content,
                        test_set[3])

    def test_pre_process_content2_rename_existing_file_and_its_link_in_content(
            self):
        self.file_converter._file_content = '[existing_md](a-file.md)'
        self.file_converter._metadata_schema = ['title']
        self.file_converter._file = Path('a-file.md')
        self.file_converter._conversion_settings.export_format = 'gfm'
        self.file_converter._conversion_settings.conversion_input = 'markdown'
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(
                d.path)
            Path(d.path, 'a-file.md').touch()

            self.file_converter.pre_process_content()
            assert Path(d.path, 'a-file-old-1.md').exists()

            self.assertTrue(
                'a-file-old-1.md'
                in self.file_converter._pre_processed_content,
                'Failed to rename existing file link placeholders',
            )

    def test_post_process_obsidian_image_links_if_required(self):
        test_strings = [
            ('<img src="filepath/image.png" width="600">',
             '![|600](filepath/image.png)',
             'link not converted to obsidian correctly'),
            ('<img src="filepath/image.png" width="600"/>',
             '![|600](filepath/image.png)',
             'link with closing forward slash not converted to obsidian correctly'
             ),
            ('![](filepath/image.png)', '![](filepath/image.png)',
             'std markdown image link not left alone')
        ]
        self.conversion_settings.export_format = 'obsidian'

        for test_set in test_strings:
            with self.subTest(
                    msg=f'Testing image link format {test_set[0]} conversion'):
                self.file_converter._post_processed_content = test_set[0]
                self.file_converter.post_process_obsidian_image_links_if_required(
                )
                self.assertEqual(test_set[1],
                                 self.file_converter._post_processed_content,
                                 test_set[2])

    def test_read_file(self):
        with TempDirectory() as d:
            source_file = Path(d.path, 'some_markdown.md')
            source_file.write_text('hello\nworld!')
            self.file_converter._file = source_file

            self.file_converter.read_file()
            self.assertEqual('hello\nworld!',
                             self.file_converter._file_content,
                             'failed to read file content')

    def test_convert_content(self):
        self.file_converter._pre_processed_content = '<h1>Header 1</h1>'
        self.file_converter.convert_content()
        self.assertEqual('# Header 1\n',
                         self.file_converter._converted_content,
                         'failed to convert content')

    def test_write_post_processed_content(self):
        with TempDirectory() as d:
            self.file_converter._file = Path(d.path, 'test.txt')
            self.file_converter._post_processed_content = '# Header 1\n'
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(
                d.path)
            self.file_converter.write_post_processed_content()
            output_path = Path(d.path, 'test.md')
            read_text = output_path.read_text()
            self.assertEqual('# Header 1\n', read_text,
                             'Failed to write content')

    def test_post_process_content(self):
        self.file_converter._converted_content = 'Hello'
        self.file_converter._metadata_processor._metadata = {'test': 'data'}
        self.file_converter._conversion_settings.markdown_conversion_input = 'gfm'
        self.file_converter.post_process_content()

        self.assertEqual('---\ntest: data\n---\n\nHello',
                         self.file_converter._post_processed_content,
                         'failed to post process content 1')

        self.file_converter._pre_processed_content = '---\ntest: data\n---\n\nHello'
        self.file_converter._metadata_processor._metadata = {
            'title': 'My Title'
        }
        self.file_converter._conversion_settings.markdown_conversion_input = 'pandoc_markdown'
        self.file_converter.post_process_content()

        self.assertEqual('---\ntitle: My Title\n---\n\nHello',
                         self.file_converter._post_processed_content,
                         'failed to post process content 2')

        self.file_converter._pre_processed_content = '---\ntest: data\n---\n\nHello'
        self.file_converter._metadata_processor._metadata = {
            'title': 'My Title'
        }
        self.file_converter._conversion_settings.markdown_conversion_input = 'gfm'
        self.file_converter.post_process_content()

        self.assertEqual('---\ntitle: My Title\n---\n\nHello',
                         self.file_converter._post_processed_content,
                         'failed to post process content 3')

    def test_set_out_put_extension(self):
        extension = file_mover.get_file_suffix_for(
            self.file_converter._conversion_settings.export_format)
        self.assertEqual('.md', extension,
                         'failed to select correct md extension')

        self.file_converter._conversion_settings = ConversionSettings()
        self.file_converter._conversion_settings.set_quick_setting('html')
        extension = file_mover.get_file_suffix_for(
            self.file_converter._conversion_settings.export_format)
        self.assertEqual('.html', extension,
                         'failed to select correct html extension')

    def test_convert(self):
        self.file_converter._conversion_settings = ConversionSettings()
        self.file_converter._conversion_settings.set_quick_setting('obsidian')
        with TempDirectory() as d:
            self.file_converter._conversion_settings.source = Path(d.path)
            self.file_converter._conversion_settings.export_folder = Path(
                d.path, 'export')
            Path(d.path, 'export').mkdir()
            source_file = Path(d.path, 'some_markdown.md')
            source_file.write_text(
                '<img src="filepath/image.png" width="600">')

            self.file_converter.convert_note(source_file)

            result = self.file_converter._post_processed_content
            self.assertEqual('![|600](filepath/image.png)\n', result,
                             'failed to convert file')