def testIndexExtractData(nwMinimal, nwDummy): """Check the index data extraction functions. """ theProject = NWProject(nwDummy) theProject.projTree.setSeed(42) assert theProject.openProject(nwMinimal) theIndex = NWIndex(theProject, nwDummy) nHandle = theProject.newFile("Hello", nwItemClass.NOVEL, "a508bb932959c") cHandle = theProject.newFile("Jane", nwItemClass.CHARACTER, "afb3043c7b2b3") assert theIndex.scanText(cHandle, ( "# Jane Smith\n" "@tag: Jane\n" )) assert theIndex.scanText(nHandle, ( "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n" )) # The novel structure should contain the pointer to the novel file header assert str(theIndex.getNovelStructure()) == "['%s:T000001']" % nHandle # The novel file should have the correct counts cC, wC, pC = theIndex.getCounts(nHandle) assert cC == 62 # Characters in text and title only assert wC == 12 # Words in text and title only assert pC == 2 # Paragraphs in text only ## # getReferences ## # Look up an ivalid handle theRefs = theIndex.getReferences("Not a handle") assert theRefs["@pov"] == [] assert theRefs["@char"] == [] # The novel file should now refer to Jane as @pov and @char theRefs = theIndex.getReferences(nHandle) assert str(theRefs["@pov"]) == "['Jane']" assert str(theRefs["@char"]) == "['Jane']" ## # getBackReferenceList ## # None handle should return an empty dict assert theIndex.getBackReferenceList(None) == {} # The character file should have a record of the reference from the novel file theRefs = theIndex.getBackReferenceList(cHandle) assert str(theRefs) == "{'%s': 'T000001'}" % nHandle ## # getTagSource ## assert theIndex.getTagSource("Jane") == (cHandle, 2, "T000001") assert theIndex.getTagSource("John") == (None, 0, "T000000") ## # getCounts for whole text and sections ## # Get section counts for a novel file assert theIndex.scanText(nHandle, ( "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n\n" "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n" )) # Whole document cC, wC, pC = theIndex.getCounts(nHandle) assert cC == 124 assert wC == 24 assert pC == 4 # First part cC, wC, pC = theIndex.getCounts(nHandle, "T000001") assert cC == 62 assert wC == 12 assert pC == 2 # First part cC, wC, pC = theIndex.getCounts(nHandle, "T000011") assert cC == 62 assert wC == 12 assert pC == 2 # Get section counts for a note file assert theIndex.scanText(cHandle, ( "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n\n" "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n" )) # Whole document cC, wC, pC = theIndex.getCounts(cHandle) assert cC == 124 assert wC == 24 assert pC == 4 # First part cC, wC, pC = theIndex.getCounts(cHandle, "T000001") assert cC == 62 assert wC == 12 assert pC == 2 # First part cC, wC, pC = theIndex.getCounts(cHandle, "T000011") assert cC == 62 assert wC == 12 assert pC == 2 assert theProject.closeProject()
def testCoreIndex_ExtractData(nwMinimal, dummyGUI): """Check the index data extraction functions. """ theProject = NWProject(dummyGUI) theProject.projTree.setSeed(42) assert theProject.openProject(nwMinimal) theIndex = NWIndex(theProject, dummyGUI) nHandle = theProject.newFile("Hello", nwItemClass.NOVEL, "a508bb932959c") cHandle = theProject.newFile("Jane", nwItemClass.CHARACTER, "afb3043c7b2b3") assert theIndex.getNovelData("", "") is None assert theIndex.getNovelData("a508bb932959c", "") is None assert theIndex.scanText(cHandle, ("# Jane Smith\n" "@tag: Jane\n")) assert theIndex.scanText(nHandle, ("# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n")) # The novel structure should contain the pointer to the novel file header theKeys = [] for aKey, _, _, _ in theIndex.novelStructure(): theKeys.append(aKey) assert theKeys == ["%s:T000001" % nHandle] # Check that excluded files can be skipped theProject.projTree[nHandle].setExported(False) theKeys = [] for aKey, _, _, _ in theIndex.novelStructure(skipExcluded=False): theKeys.append(aKey) assert theKeys == ["%s:T000001" % nHandle] theKeys = [] for aKey, _, _, _ in theIndex.novelStructure(skipExcluded=True): theKeys.append(aKey) assert theKeys == [] theKeys = [] for aKey, _, _, _ in theIndex.novelStructure(): theKeys.append(aKey) assert theKeys == [] # The novel file should have the correct counts cC, wC, pC = theIndex.getCounts(nHandle) assert cC == 62 # Characters in text and title only assert wC == 12 # Words in text and title only assert pC == 2 # Paragraphs in text only # getReferences # ============= # Look up an ivalid handle theRefs = theIndex.getReferences("Not a handle") assert theRefs["@pov"] == [] assert theRefs["@char"] == [] # The novel file should now refer to Jane as @pov and @char theRefs = theIndex.getReferences(nHandle) assert theRefs["@pov"] == ["Jane"] assert theRefs["@char"] == ["Jane"] # getBackReferenceList # ==================== # None handle should return an empty dict assert theIndex.getBackReferenceList(None) == {} # The character file should have a record of the reference from the novel file theRefs = theIndex.getBackReferenceList(cHandle) assert theRefs == {nHandle: "T000001"} # getTagSource # ============ assert theIndex.getTagSource("Jane") == (cHandle, 2, "T000001") assert theIndex.getTagSource("John") == (None, 0, "T000000") # getCounts # ========= # For whole text and sections # Get section counts for a novel file assert theIndex.scanText( nHandle, ("# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n\n" "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really. She's still awesome though.\n")) # Whole document cC, wC, pC = theIndex.getCounts(nHandle) assert cC == 152 assert wC == 28 assert pC == 4 # First part cC, wC, pC = theIndex.getCounts(nHandle, "T000001") assert cC == 62 assert wC == 12 assert pC == 2 # Second part cC, wC, pC = theIndex.getCounts(nHandle, "T000011") assert cC == 90 assert wC == 16 assert pC == 2 # Get section counts for a note file assert theIndex.scanText( cHandle, ("# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really.\n\n" "# Hello World!\n" "@pov: Jane\n" "@char: Jane\n\n" "% this is a comment\n\n" "This is a story about Jane Smith.\n\n" "Well, not really. She's still awesome though.\n")) # Whole document cC, wC, pC = theIndex.getCounts(cHandle) assert cC == 152 assert wC == 28 assert pC == 4 # First part cC, wC, pC = theIndex.getCounts(cHandle, "T000001") assert cC == 62 assert wC == 12 assert pC == 2 # Second part cC, wC, pC = theIndex.getCounts(cHandle, "T000011") assert cC == 90 assert wC == 16 assert pC == 2 # Novel Stats # =========== hHandle = theProject.newFile("Chapter", nwItemClass.NOVEL, "a508bb932959c") sHandle = theProject.newFile("Scene One", nwItemClass.NOVEL, "a508bb932959c") tHandle = theProject.newFile("Scene Two", nwItemClass.NOVEL, "a508bb932959c") theProject.projTree[hHandle].itemLayout == nwItemLayout.CHAPTER theProject.projTree[sHandle].itemLayout == nwItemLayout.SCENE theProject.projTree[tHandle].itemLayout == nwItemLayout.SCENE assert theIndex.scanText(hHandle, "## Chapter One\n\n") assert theIndex.scanText(sHandle, "### Scene One\n\n") assert theIndex.scanText(tHandle, "### Scene Two\n\n") assert theIndex._listNovelHandles(False) == [ nHandle, hHandle, sHandle, tHandle ] assert theIndex._listNovelHandles(True) == [hHandle, sHandle, tHandle] # Add a fake handle to the tree and check that it's ignored theProject.projTree._treeOrder.append("0000000000000") assert theIndex._listNovelHandles(False) == [ nHandle, hHandle, sHandle, tHandle ] theProject.projTree._treeOrder.remove("0000000000000") # Extract stats assert theIndex.getNovelWordCount(False) == 34 assert theIndex.getNovelWordCount(True) == 6 assert theIndex.getNovelTitleCounts(False) == [0, 2, 1, 2, 0] assert theIndex.getNovelTitleCounts(True) == [0, 0, 1, 2, 0] # Table of Contents assert theIndex.getTableOfContents(0, True) == [] assert theIndex.getTableOfContents(1, True) == [] assert theIndex.getTableOfContents(2, True) == [ ("%s:T000001" % hHandle, 2, "Chapter One", 6), ] assert theIndex.getTableOfContents(3, True) == [ ("%s:T000001" % hHandle, 2, "Chapter One", 2), ("%s:T000001" % sHandle, 3, "Scene One", 2), ("%s:T000001" % tHandle, 3, "Scene Two", 2), ] assert theIndex.getTableOfContents(0, False) == [] assert theIndex.getTableOfContents(1, False) == [ ("%s:T000001" % nHandle, 1, "Hello World!", 12), ("%s:T000011" % nHandle, 1, "Hello World!", 22), ] # Header Word Counts bHandle = "0000000000000" assert theIndex.getHandleWordCounts(bHandle) == [] assert theIndex.getHandleWordCounts(hHandle) == [("%s:T000001" % hHandle, 2)] assert theIndex.getHandleWordCounts(sHandle) == [("%s:T000001" % sHandle, 2)] assert theIndex.getHandleWordCounts(tHandle) == [("%s:T000001" % tHandle, 2)] assert theIndex.getHandleWordCounts(nHandle) == [ ("%s:T000001" % nHandle, 12), ("%s:T000011" % nHandle, 16) ] assert theProject.closeProject() # Header Record bHandle = "0000000000000" assert theIndex.getHandleHeaders(bHandle) == [] assert theIndex.getHandleHeaders(hHandle) == [("T000001", "H2", "Chapter One")] assert theIndex.getHandleHeaders(sHandle) == [("T000001", "H3", "Scene One")] assert theIndex.getHandleHeaders(tHandle) == [("T000001", "H3", "Scene Two")] assert theIndex.getHandleHeaders(nHandle) == [ ("T000001", "H1", "Hello World!"), ("T000011", "H1", "Hello World!") ]