def test_article_with_footnote(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '<p>This is some content' '<sup id="fnref:1"><a class="footnote-ref" href="#fn:1" ' 'rel="footnote">1</a></sup>' ' with some footnotes' '<sup id="fnref:footnote"><a class="footnote-ref" ' 'href="#fn:footnote" rel="footnote">2</a></sup></p>\n' '<div class="footnote">\n' '<hr />\n<ol>\n<li id="fn:1">\n' '<p>Numbered footnote ' '<a class="footnote-backref" href="#fnref:1" rev="footnote" ' 'title="Jump back to footnote 1 in the text">↩</a></p>\n' '</li>\n<li id="fn:footnote">\n' '<p>Named footnote ' '<a class="footnote-backref" href="#fnref:footnote" rev="footnote" ' 'title="Jump back to footnote 2 in the text">↩</a></p>\n' '</li>\n</ol>\n</div>') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ('<p>Summary with <strong>inline</strong> markup ' '<em>should</em> be supported.</p>'), 'date': datetime.datetime(2012, 10, 31), 'slug': 'article-with-markdown-containing-footnotes', } self.assertEqual(content, expected_content) for key, value in metadata.items(): self.assertEqual(value, expected_metadata[key], key)
def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': '<p>I have a lot to test</p>', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': '<p>パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。</p>', 'category': '指導書', 'date': SafeDatetime(2012, 12, 20), 'modified': SafeDatetime(2012, 12, 22), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } self.assertDictHasSubset(metadata, expected)
def test_article_with_metadata(self): reader = readers.MarkdownReader({}) content, metadata = reader.read( _path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': '<p>I have a lot to test</p>', 'date': datetime.datetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], } for key, value in metadata.items(): self.assertEqual(value, expected[key], key) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': '<p>パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。</p>', 'category': '指導書', 'date': datetime.datetime(2012, 12, 20), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } for key, value in metadata.items(): self.assertEqual(value, expected[key], key)
def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read(_path('article_with_md_extension.md')) expected = ( "<h1>Test Markdown File Header</h1>\n" "<h2>Used for pelican test</h2>\n" "<p>The quick brown fox jumped over the lazy dog's back.</p>") self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mkd_extension.mkd')) expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican" " test</h2>\n<p>This is another markdown test file. Uses" " the mkd extension.</p>") self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_markdown_extension.markdown')) expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican" " test</h2>\n<p>This is another markdown test file. Uses" " the markdown extension.</p>") self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read( _path('article_with_mdown_extension.mdown')) expected = ("<h1>Test Markdown File Header</h1>\n<h2>Used for pelican" " test</h2>\n<p>This is another markdown test file. Uses" " the mdown extension.</p>") self.assertEqual(content, expected)
def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('empty_with_bom.md')) self.assertEqual(metadata, {}) self.assertEqual(content, '')
def read(self, filename): """Parse content and metadata of markdown files""" QUIET = self.settings.get('RMD_READER_KNITR_QUIET', True) ENCODING = self.settings.get('RMD_READER_KNITR_ENCODING', 'UTF-8') CLEANUP = self.settings.get('RMD_READER_CLEANUP', True) RENAME_PLOT = self.settings.get('RMD_READER_RENAME_PLOT', 'chunklabel') if type(RENAME_PLOT) is bool: logger.error( "RMD_READER_RENAME_PLOT takes a string value (either chunklabel or directory), please see the readme." ) if RENAME_PLOT: RENAME_PLOT = 'chunklabel' logger.error("Defaulting to chunklabel") else: RENAME_PLOT = 'disabled' logger.error("Disabling plot renaming") logger.debug("RMD_READER_KNITR_QUIET = %s", QUIET) logger.debug("RMD_READER_KNITR_ENCODING = %s", ENCODING) logger.debug("RMD_READER_CLEANUP = %s", CLEANUP) logger.debug("RMD_READER_RENAME_PLOT = %s", RENAME_PLOT) # replace single backslashes with double backslashes filename = filename.replace('\\', '\\\\') # parse Rmd file - generate md file md_filename = filename.replace('.Rmd', '.aux').replace('.rmd', '.aux') if RENAME_PLOT == 'chunklabel' or RENAME_PLOT == 'directory': if RENAME_PLOT == 'chunklabel': chunk_label = os.path.splitext(os.path.basename(filename))[0] logger.debug('Chunk label: %s', chunk_label) elif RENAME_PLOT == 'directory': chunk_label = 'unnamed-chunk' PATH = self.settings.get( 'PATH', '%s/content' % settings.DEFAULT_CONFIG.get('PATH')) src_name = os.path.splitext(os.path.relpath(filename, PATH))[0] idx = KNITR.opts_chunk.names.index('set') knitroptschunk = { 'fig.path': '%s-' % os.path.join(FIG_PATH, src_name) } KNITR.opts_chunk[idx]( **{str(k): v for k, v in knitroptschunk.items()}) logger.debug('Figures path: %s, chunk label: %s', knitroptschunk['fig.path'], chunk_label) R_OBJECTS.r(''' opts_knit$set(unnamed.chunk.label="{unnamed_chunk_label}") render_markdown() hook_plot <- knit_hooks$get('plot') knit_hooks$set(plot=function(x, options) hook_plot(paste0("{{filename}}/", x), options)) '''.format(unnamed_chunk_label=chunk_label)) with warnings.catch_warnings(): warnings.simplefilter("ignore") KNITR.knit(filename, md_filename, quiet=QUIET, encoding=ENCODING) # read md file - create a MarkdownReader md_reader = readers.MarkdownReader(self.settings) content, metadata = md_reader.read(md_filename) # remove md file if CLEANUP: os.remove(md_filename) return content, metadata
def test_article_with_mkd_extension(self): # test to ensure the mkd extension is being processed by the correct reader reader = readers.MarkdownReader({}) content, metadata = reader.read(_filename('article_with_mkd_extension.mkd')) expected = "<h1>Test Markdown File Header</h1>\n"\ "<h2>Used for pelican test</h2>\n"\ "<p>This is another markdown test file. Uses the mkd extension.</p>" self.assertEqual(content, expected)
def test_duplicate_tags_or_authors_are_removed(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_duplicate_tags_authors.md')) expected = { 'tags': ['foo', 'bar', 'foobar'], 'authors': ['Author, First', 'Author, Second'], } self.assertDictHasSubset(metadata, expected)
def test_article_with_md_extention(self): # test to ensure the md extension is being processed by the correct reader reader = readers.MarkdownReader({}) content, metadata = reader.read( _filename('article_with_md_extension.md')) expected = "<h1>Test Markdown File Header</h1>\n"\ "<h2>Used for pelican test</h2>\n"\ "<p>The quick brown fox jumped over the lazy dog's back.</p>" self.assertEqual(content, expected)
def test_article_with_metadata(self): reader = readers.MarkdownReader({}) content, metadata = reader.read(_path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': '<p>I have a lot to test</p>', 'date': datetime.datetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], } for key, value in metadata.items(): self.assertEquals(value, expected[key], key)
def test_article_with_summary_metadata(self): reader = readers.MarkdownReader({}) content, metadata = reader.read( _filename('article_with_markdown_and_summary_metadata_single.md')) expected_summary = u'<p>A single-line summary should be supported'\ u' as well as <strong>inline markup</strong>.</p>' self.assertEquals(expected_summary, metadata['summary'], 'summary') content, metadata = reader.read( _filename('article_with_markdown_and_summary_metadata_multi.md')) expected_summary = u'<p>A multi-line summary should be supported'\ u'\nas well as <strong>inline markup</strong>.</p>' self.assertEquals(expected_summary, metadata['summary'], 'summary')
def test_metadata_not_parsed_for_metadata(self): settings = get_settings() settings['FORMATTED_FIELDS'] = ['summary'] reader = readers.MarkdownReader(settings=settings) content, metadata = reader.read( _path('article_with_markdown_and_nested_metadata.md')) expected = { 'title': 'Article with markdown and nested summary metadata', 'summary': '<p>Test: This metadata value looks like metadata</p>', } self.assertDictHasSubset(metadata, expected)
def read_comment(comment_file, header=True): content = None if comment_file[-4:] == '.rst': reader = readers.RstReader({}) body, metadata = reader.read(comment_file) content = metadata if header else body elif comment_file[-3:] == '.md': reader = readers.MarkdownReader({}) body, metadata = reader.read(comment_file) content = metadata if header else body # return header or body return content
def read(self, filename): """Parse content and metadata of markdown files""" # replace single backslashes with double backslashes filename = filename.replace('\\', '\\\\') # parse Rmd file - generate md file md_filename = filename.replace('.Rmd', '.aux').replace('.rmd', '.aux') robjects.r(""" require(knitr); opts_knit$set(base.dir='{2}/content'); knit('{0}', '{1}', quiet=TRUE, encoding='UTF-8'); """.format(filename, md_filename, settings.DEFAULT_CONFIG.get('PATH'))) md_reader = readers.MarkdownReader(self.settings) content, metadata = md_reader.read(md_filename) os.remove(md_filename) return content, metadata
def test_article_with_md_extension(self): # test to ensure the md extension is being processed by the correct reader reader = readers.MarkdownReader({}) content, metadata = reader.read(_filename('article_with_md_extension.md')) expected = "<h1>Test Markdown File Header</h1>\n"\ "<h2>Used for pelican test</h2>\n"\ "<p>The quick brown fox jumped over the lazy dog's back.</p>" self.assertEqual(content, expected) expected = { 'category': 'test', 'title': 'Test md File', } for key, value in metadata.items(): self.assertEquals(value, expected[key], key)
def test_article_with_footnote(self): settings = get_settings() ec = settings['MARKDOWN']['extension_configs'] ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'} reader = readers.MarkdownReader(settings) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '<p>This is some content' '<sup id="fnref-1"><a class="footnote-ref" href="#fn-1"' '>1</a></sup>' ' with some footnotes' '<sup id="fnref-footnote"><a class="footnote-ref" ' 'href="#fn-footnote">2</a></sup></p>\n' '<div class="footnote">\n' '<hr>\n<ol>\n<li id="fn-1">\n' '<p>Numbered footnote ' '<a class="footnote-backref" href="#fnref-1" ' 'title="Jump back to footnote 1 in the text">↩</a></p>\n' '</li>\n<li id="fn-footnote">\n' '<p>Named footnote ' '<a class="footnote-backref" href="#fnref-footnote"' ' title="Jump back to footnote 2 in the text">↩</a></p>\n' '</li>\n</ol>\n</div>') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ('<p>Summary with <strong>inline</strong> markup ' '<em>should</em> be supported.</p>'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of ' 'Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata)
def test_article_with_footnote(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '<p>This is some content' '<sup id="fnref:1"><a class="footnote-ref" href="#fn:1" ' 'rel="footnote">1</a></sup>' ' with some footnotes' '<sup id="fnref:footnote"><a class="footnote-ref" ' 'href="#fn:footnote" rel="footnote">2</a></sup></p>\n' '<div class="footnote">\n' '<hr />\n<ol>\n<li id="fn:1">\n' '<p>Numbered footnote ' '<a class="footnote-backref" href="#fnref:1" rev="footnote" ' 'title="Jump back to footnote 1 in the text">↩</a></p>\n' '</li>\n<li id="fn:footnote">\n' '<p>Named footnote ' '<a class="footnote-backref" href="#fnref:footnote" rev="footnote"' ' title="Jump back to footnote 2 in the text">↩</a></p>\n' '</li>\n</ol>\n</div>') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ('<p>Summary with <strong>inline</strong> markup ' '<em>should</em> be supported.</p>'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'slug': 'article-with-markdown-containing-footnotes', 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) for key, value in metadata.items(): self.assertEqual(value, expected_metadata[key], key)
def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as expected reader = readers.MarkdownReader({}) reader.settings.update(dict(MARKDOWN_EXTENSIONS=[ 'toc', ])) content, metadata = reader.read( _filename('article_with_markdown_markup_extensions.md')) expected = '<div class="toc">\n'\ '<ul>\n'\ '<li><a href="#level1">Level1</a><ul>\n'\ '<li><a href="#level2">Level2</a></li>\n'\ '</ul>\n'\ '</li>\n'\ '</ul>\n'\ '</div>\n'\ '<h2 id="level1">Level1</h2>\n'\ '<h3 id="level2">Level2</h3>' self.assertEqual(content, expected)
def read(self, filename): """Parse content and metadata of markdown files""" global knitr QUIET = self.settings.get('RMD_READER_KNITR_QUIET', True) ENCODING = self.settings.get('RMD_READER_KNITR_ENCODING', 'UTF-8') CLEANUP = self.settings.get('RMD_READER_CLEANUP', True) logger.debug("RMD_READER_KNITR_QUIET = %s", QUIET) logger.debug("RMD_READER_KNITR_QUIET = %s", ENCODING) logger.debug("RMD_READER_CLEANUP = %s", CLEANUP) # replace single backslashes with double backslashes filename = filename.replace('\\', '\\\\') # parse Rmd file - generate md file md_filename = filename.replace('.Rmd', '.aux').replace('.rmd', '.aux') knitr.knit(filename, md_filename, quiet=QUIET, encoding=ENCODING) # read md file - create a MarkdownReader md_reader = readers.MarkdownReader(self.settings) content, metadata = md_reader.read(md_filename) # remove md file if CLEANUP: os.remove(md_filename) return content, metadata
def read(self, filename): """Parse content and metadata of markdown files""" QUIET = self.settings.get("RMD_READER_KNITR_QUIET", True) ENCODING = self.settings.get("RMD_READER_KNITR_ENCODING", "UTF-8") CLEANUP = self.settings.get("RMD_READER_CLEANUP", True) RENAME_PLOT = self.settings.get("RMD_READER_RENAME_PLOT", "chunklabel") if type(RENAME_PLOT) is bool: logger.error( "RMD_READER_RENAME_PLOT takes a string value (either chunklabel or directory), please see the readme." ) if RENAME_PLOT: RENAME_PLOT = "chunklabel" logger.error("Defaulting to chunklabel") else: RENAME_PLOT = "disabled" logger.error("Disabling plot renaming") logger.debug("RMD_READER_KNITR_QUIET = %s", QUIET) logger.debug("RMD_READER_KNITR_ENCODING = %s", ENCODING) logger.debug("RMD_READER_CLEANUP = %s", CLEANUP) logger.debug("RMD_READER_RENAME_PLOT = %s", RENAME_PLOT) # replace single backslashes with double backslashes filename = filename.replace("\\", "\\\\") # parse Rmd file - generate md file md_filename = filename.replace(".Rmd", ".aux").replace(".rmd", ".aux") if RENAME_PLOT == "chunklabel" or RENAME_PLOT == "directory": if RENAME_PLOT == "chunklabel": chunk_label = os.path.splitext(os.path.basename(filename))[0] logger.debug("Chunk label: %s", chunk_label) elif RENAME_PLOT == "directory": chunk_label = "unnamed-chunk" PATH = self.settings.get( "PATH", "%s/content" % settings.DEFAULT_CONFIG.get("PATH")) src_name = os.path.splitext(os.path.relpath(filename, PATH))[0] idx = KNITR.opts_chunk.names.index("set") knitroptschunk = { "fig.path": "%s-" % os.path.join(FIG_PATH, src_name) } KNITR.opts_chunk[idx]( **{str(k): v for k, v in knitroptschunk.items()}) logger.debug( "Figures path: %s, chunk label: %s", knitroptschunk["fig.path"], chunk_label, ) R_OBJECTS.r(""" opts_knit$set(unnamed.chunk.label="{unnamed_chunk_label}") render_markdown() hook_plot <- knit_hooks$get('plot') knit_hooks$set(plot=function(x, options) hook_plot(paste0("{{static}}/", x), options)) """.format(unnamed_chunk_label=chunk_label)) with warnings.catch_warnings(): warnings.simplefilter("ignore") KNITR.knit(filename, md_filename, quiet=QUIET, encoding=ENCODING) # read md file - create a MarkdownReader md_reader = readers.MarkdownReader(self.settings) content, metadata = md_reader.read(md_filename) # remove md file if CLEANUP: os.remove(md_filename) return content, metadata