def write_mediawiki(pages, path, template): """Write a markup file for each page recovered from the XML.""" makedirs(path, exist_ok=True) with open(template) as tmpl_file: tmpl = tmpl_file.read() for page_id, page_data in pages.items(): body = page_data.get('body', '') body_text = "\n".join(to_mw(body)) filename = page_data.get('title', '') + '_' + str(page_id) + '.txt' filepath = join(path, filename) data = { 'body': body_text, 'title': filename, 'version': page_data.get('version'), 'current': page_data.get('contentStatus'), 'created': page_data.get('creationDate'), 'latest_mod': page_data.get('lastModificationDate'), 'position': page_data.get('position'), } filecontent = tmpl.format(**data) with open(filepath, 'w') as outfile: outfile.write(filecontent)
def test_unordered_list_b(self): """There are two versions of unordered list items in confluence markup, this tests the `-` version. """ page_body = 'Some text.\n- first item.\n- second item.\n' expected = 'Some text.\n* first item.\n* second item.\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_no_inline_unordered_lists(self): """Unordered lists must start at the beginning of a line.""" # Note: we don't test for unordered lists with `*`, because these # would indicate emphasis when not used at the beginning of the line. page_body = 'Some text with - a pseudo-list\nSome more -- fake lists.\n' # Should be a no-op. result = '\n'.join(to_mw(page_body)) self.assertEqual(result, page_body)
def test_no_inline_headings(self): """Most markup which is currently implemented must be at the beginning of lines, i.e. a `h1. ` in the middle of a line should not be interpreted as heading markup. """ page_body = 'h1. A Heading\n\nA line with h1. in it.\nA line with h3. in it.\n' expected = '= A Heading =\n\nA line with h1. in it.\nA line with h3. in it.\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_internal_link(self): """An internal link without special label text.""" lines = [ ('A line with a [PageLink] in it.\n', 'A line with a [[PageLink]] in it.\n'), ('This line ends with [PageLink].\n', 'This line ends with [[PageLink]].\n'), ('[PageLink] at the start.\n', '[[PageLink]] at the start.\n'), ] for confluence, mediawiki in lines: result = '\n'.join(to_mw(confluence)) self.assertEqual(result, mediawiki)
def test_internal_link_with_label(self): """An internal link with custom label text.""" lines = [ ('A line with a [My Label|PageLink#anchor] in it.\n', 'A line with a [[PageLink#anchor|My Label]] in it.\n'), ('This line ends with [My Label|PageLink#anchor].\n', 'This line ends with [[PageLink#anchor|My Label]].\n'), ('[My Label|PageLink#anchor] at the start.\n', '[[PageLink#anchor|My Label]] at the start.\n'), ] for confluence, mediawiki in lines: result = '\n'.join(to_mw(confluence)) self.assertEqual(result, mediawiki)
def test_strong_emphasis(self): """This type of emphasis is normally rendered as bold text.""" known_items = [ ("text with *inline emphasis* and more text", "text with '''inline emphasis''' and more text"), ("*emphasis at the beginning* and more text", "'''emphasis at the beginning''' and more text"), ("emphasis with following *non-white-space*, and text", "emphasis with following '''non-white-space''', and text"), ] for page_body, expected in known_items: result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_external_link(self): """An external link. In this specific case, `to_mw()` should be a no-op. """ lines = [ ('A line with a [https://example.com/page#anchor] in it.\n', 'A line with a [https://example.com/page#anchor] in it.\n'), ('This line ends with [https://example.com/page#anchor].\n', 'This line ends with [https://example.com/page#anchor].\n'), ('[https://example.com/page#anchor] at the start.\n', '[https://example.com/page#anchor] at the start.\n'), ] for confluence, mediawiki in lines: result = '\n'.join(to_mw(confluence)) self.assertEqual(result, mediawiki)
def test_external_link_with_label(self): """An external link with custom label text. Contrary to the version without label, the syntax of mediawiki and confluence differs slightly. """ lines = [ ('A line with a [My Label|https://example.com/page#anchor] in it.\n', 'A line with a [https://example.com/page#anchor My Label] in it.\n' ), ('This line ends with [My Label|https://example.com/page#anchor].\n', 'This line ends with [https://example.com/page#anchor My Label].\n' ), ('[My Label|https://example.com/page#anchor] at the start.\n', '[https://example.com/page#anchor My Label] at the start.\n'), ] for confluence, mediawiki in lines: result = '\n'.join(to_mw(confluence)) self.assertEqual(result, mediawiki)
def test_nested_ordered_lists(self): """Test nested ordered lists, but not mixed ones.""" page_body = 'Some text.\n# first, first\n## second, first\n## second, second\n# first, second\n' expected = 'Some text.\n# first, first\n## second, first\n## second, second\n# first, second\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_ordered_list(self): """Test if ordered list items are transformed correctly.""" page_body = 'Some text.\n# first item\n# second item\n' expected = 'Some text.\n# first item\n# second item\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_nested_unordered_lists_b(self): """Test nested unordered lists of type `-`, but not mixed ones.""" page_body = 'Some text.\n- first level, first item\n-- second level, first item\n-- second level, second item\n- first level, second item\n' expected = 'Some text.\n* first level, first item\n** second level, first item\n** second level, second item\n* first level, second item\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_various_headings(self): """Test transformation of heading markup.""" page_body = 'h1. H1\nh2. H2\nh3. H3\nh4. H4\n' expected = '= H1 =\n== H2 ==\n=== H3 ===\n==== H4 ====\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_empty_page(self): """A completely empty body should yield an empty generator.""" page_body = '' expected = '' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_no_inline_ordered_lists(self): """Ordered lists must start at the beginning of a line.""" page_body = 'Some text with # a pseudo list\nSome more ## fake lists\n' expected = 'Some text with # a pseudo list\nSome more ## fake lists\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)
def test_mixed_lists(self): """Ordered and unordered lists can be mixed.""" page_body = '# first\n#* mixed\n#* list\n# for us\n' expected = '# first\n#* mixed\n#* list\n# for us\n' result = '\n'.join(to_mw(page_body)) self.assertEqual(result, expected)