def testCoreToken_Headers(dummyGUI): """Test the header and page parser of the Tokenizer class. """ theProject = NWProject(dummyGUI) theProject.projLang = "en" theProject._loadProjectLocalisation() theToken = Tokenizer(theProject, dummyGUI) # Nothing theToken.theText = "Some text ...\n" assert theToken.doHeaders() is True theToken.isNone = True assert theToken.doHeaders() is False theToken.isNone = False assert theToken.doHeaders() is True theToken.isNote = True assert theToken.doHeaders() is False theToken.isNote = False ## # Novel ## theToken.isNovel = True # Titles # ====== # H1: Title theToken.theText = "# Novel Title\n" theToken.setTitleFormat(r"T: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD1, 1, "T: Novel Title", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Chapters # ======== # H2: Chapter theToken.theText = "## Chapter One\n" theToken.setChapterFormat(r"C: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "C: Chapter One", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Unnumbered Chapter theToken.theText = "## Chapter One\n" theToken.setUnNumberedFormat(r"U: %title%") theToken.isUnNum = True theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "U: Chapter One", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Unnumbered Chapter with Star theToken.theText = "## *Prologue\n" theToken.setUnNumberedFormat(r"U: %title%") theToken.isUnNum = False theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "U: Prologue", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Word Number theToken.theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chw%") theToken.numChapter = 0 theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter One", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Roman Number Upper Case theToken.theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chI%") theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter II", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Roman Number Lower Case theToken.theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chi%") theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter iii", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Scenes # ====== # H3: Scene w/Title theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"S: %title%", False) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD3, 1, "S: Scene One", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Hidden wo/Format theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"", True) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene wo/Format, first theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"", False) theToken.firstScene = True theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene wo/Format, not first theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"", False) theToken.firstScene = False theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_SKIP, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Separator, first theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"* * *", False) theToken.firstScene = True theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Separator, not first theToken.theText = "### Scene One\n" theToken.setSceneFormat(r"* * *", False) theToken.firstScene = False theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_SEP, 1, "* * *", None, Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene w/Absolute Number theToken.theText = "### A Scene\n" theToken.setSceneFormat(r"Scene %sca%", False) theToken.numAbsScene = 0 theToken.numChScene = 0 theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD3, 1, "Scene 1", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene w/Chapter Number theToken.theText = "### A Scene\n" theToken.setSceneFormat(r"Scene %ch%.%sc%", False) theToken.numAbsScene = 0 theToken.numChScene = 1 theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD3, 1, "Scene 3.2", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Sections # ======== # H4: Section Hidden wo/Format theToken.theText = "#### A Section\n" theToken.setSectionFormat(r"", True) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section Visible wo/Format theToken.theText = "#### A Section\n" theToken.setSectionFormat(r"", False) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_SKIP, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section w/Format theToken.theText = "#### A Section\n" theToken.setSectionFormat(r"X: %title%", False) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD4, 1, "X: A Section", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section Separator theToken.theText = "#### A Section\n" theToken.setSectionFormat(r"* * *", False) theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_SEP, 1, "* * *", None, Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Check the first scene detector assert theToken.firstScene is False theToken.firstScene = True assert theToken.firstScene is True theToken.theText = "Some text ...\n" theToken.tokenizeText() theToken.doHeaders() assert theToken.firstScene is False ## # Title or Partition ## theToken.isNovel = False # H1: Title theToken.theText = "# Novel Title\n" theToken.tokenizeText() theToken.isTitle = True theToken.isPart = False theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_TITLE, 1, "Novel Title", None, Tokenizer.A_PBB_AUT | Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_PBA | Tokenizer.A_CENTRE), ] # H1: Partition theToken.theText = "# Partition Title\n" theToken.setTitleFormat(r"T: %title%") theToken.tokenizeText() theToken.isTitle = False theToken.isPart = True theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_HEAD1, 1, "Partition Title", None, Tokenizer.A_PBB | Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_PBA | Tokenizer.A_CENTRE), ] ## # Page ## theToken.isNovel = False theToken.isTitle = False theToken.isPart = False theToken.isPage = True # Some Page Text theToken.theText = "Page text\n\nMore text\n" theToken.tokenizeText() theToken.doHeaders() assert theToken.theTokens == [ (Tokenizer.T_TEXT, 1, "Page text", [], Tokenizer.A_PBB | Tokenizer.A_LEFT), (Tokenizer.T_EMPTY, 2, "", None, Tokenizer.A_LEFT), (Tokenizer.T_TEXT, 3, "More text", [], Tokenizer.A_LEFT), (Tokenizer.T_EMPTY, 3, "", None, Tokenizer.A_LEFT), ]
def testCoreToken_TextOps(monkeypatch, nwMinimal, dummyGUI): """Test handling files and text in the Tokenizer class. """ theProject = NWProject(dummyGUI) theProject.projTree.setSeed(42) theProject.projLang = "en" theProject._loadProjectLocalisation() theToken = Tokenizer(theProject, dummyGUI) theToken.setKeepMarkdown(True) assert theProject.openProject(nwMinimal) sHandle = "8c659a11cd429" # Set some content to work with docText = ( "### Scene Six\n\n" "This is text with _italic text_, some **bold text**, some ~~deleted text~~, " "and some **_mixed text_** and **some _nested_ text**.\n\n" "#### Replace\n\n" "Also, replace <A> and <B>.\n\n" ) docTextR = docText.replace("<A>", "this").replace("<B>", "that") nDoc = NWDoc(theProject, dummyGUI) nDoc.openDocument(sHandle) nDoc.saveDocument(docText) nDoc.clearDocument() theProject.setAutoReplace({"A": "this", "B": "that"}) assert theProject.saveProject() # Root heading assert theToken.addRootHeading("dummy") is False assert theToken.addRootHeading(sHandle) is False assert theToken.addRootHeading("7695ce551d265") is True assert theToken.theMarkdown[-1] == "# Notes: Plot\n\n" # Set text assert theToken.setText("dummy") is False assert theToken.setText(sHandle) is True assert theToken.theText == docText with monkeypatch.context() as mp: mp.setattr("nw.constants.nwConst.MAX_DOCSIZE", 100) assert theToken.setText(sHandle, docText) is True assert theToken.theText == ( "# ERROR\n\n" "Document 'New Scene' is too big (0.00 MB). Skipping.\n\n" ) assert theToken.setText(sHandle, docText) is True assert theToken.theText == docText assert theToken.isNone is False assert theToken.isTitle is False assert theToken.isBook is False assert theToken.isPage is False assert theToken.isPart is False assert theToken.isUnNum is False assert theToken.isChap is False assert theToken.isScene is True assert theToken.isNote is False assert theToken.isNovel is True # Pre Processing theToken.doPreProcessing() assert theToken.theText == docTextR # Post Processing theToken.theResult = r"This is text with escapes: \** \~~ \__" theToken.doPostProcessing() assert theToken.theResult == "This is text with escapes: ** ~~ __" # Save File savePath = os.path.join(nwMinimal, "dump.nwd") theToken.saveRawMarkdown(savePath) assert readFile(savePath) == "# Notes: Plot\n\n"