Exemplo n.º 1
0
def testIndexExtractData(nwMinimal, nwDummy):
    """Check the index data extraction functions.
    """
    theProject = NWProject(nwDummy)
    theProject.projTree.setSeed(42)
    assert theProject.openProject(nwMinimal)

    theIndex = NWIndex(theProject, nwDummy)
    nHandle = theProject.newFile("Hello", nwItemClass.NOVEL,     "a508bb932959c")
    cHandle = theProject.newFile("Jane",  nwItemClass.CHARACTER, "afb3043c7b2b3")

    assert theIndex.scanText(cHandle, (
        "# Jane Smith\n"
        "@tag: Jane\n"
    ))
    assert theIndex.scanText(nHandle, (
        "# Hello World!\n"
        "@pov: Jane\n"
        "@char: Jane\n\n"
        "% this is a comment\n\n"
        "This is a story about Jane Smith.\n\n"
        "Well, not really.\n"
    ))

    # The novel structure should contain the pointer to the novel file header
    assert str(theIndex.getNovelStructure()) == "['%s:T000001']" % nHandle

    # The novel file should have the correct counts
    cC, wC, pC = theIndex.getCounts(nHandle)
    assert cC == 62 # Characters in text and title only
    assert wC == 12 # Words in text and title only
    assert pC == 2  # Paragraphs in text only

    ##
    #  getReferences
    ##

    # Look up an ivalid handle
    theRefs = theIndex.getReferences("Not a handle")
    assert theRefs["@pov"] == []
    assert theRefs["@char"] == []

    # The novel file should now refer to Jane as @pov and @char
    theRefs = theIndex.getReferences(nHandle)
    assert str(theRefs["@pov"]) == "['Jane']"
    assert str(theRefs["@char"]) == "['Jane']"

    ##
    #  getBackReferenceList
    ##

    # None handle should return an empty dict
    assert theIndex.getBackReferenceList(None) == {}

    # The character file should have a record of the reference from the novel file
    theRefs = theIndex.getBackReferenceList(cHandle)
    assert str(theRefs) == "{'%s': 'T000001'}" % nHandle

    ##
    #  getTagSource
    ##

    assert theIndex.getTagSource("Jane") == (cHandle, 2, "T000001")
    assert theIndex.getTagSource("John") == (None, 0, "T000000")

    ##
    #  getCounts for whole text and sections
    ##

    # Get section counts for a novel file
    assert theIndex.scanText(nHandle, (
        "# Hello World!\n"
        "@pov: Jane\n"
        "@char: Jane\n\n"
        "% this is a comment\n\n"
        "This is a story about Jane Smith.\n\n"
        "Well, not really.\n\n"
        "# Hello World!\n"
        "@pov: Jane\n"
        "@char: Jane\n\n"
        "% this is a comment\n\n"
        "This is a story about Jane Smith.\n\n"
        "Well, not really.\n"
    ))
    # Whole document
    cC, wC, pC = theIndex.getCounts(nHandle)
    assert cC == 124
    assert wC == 24
    assert pC == 4

    # First part
    cC, wC, pC = theIndex.getCounts(nHandle, "T000001")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    # First part
    cC, wC, pC = theIndex.getCounts(nHandle, "T000011")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    # Get section counts for a note file
    assert theIndex.scanText(cHandle, (
        "# Hello World!\n"
        "@pov: Jane\n"
        "@char: Jane\n\n"
        "% this is a comment\n\n"
        "This is a story about Jane Smith.\n\n"
        "Well, not really.\n\n"
        "# Hello World!\n"
        "@pov: Jane\n"
        "@char: Jane\n\n"
        "% this is a comment\n\n"
        "This is a story about Jane Smith.\n\n"
        "Well, not really.\n"
    ))
    # Whole document
    cC, wC, pC = theIndex.getCounts(cHandle)
    assert cC == 124
    assert wC == 24
    assert pC == 4

    # First part
    cC, wC, pC = theIndex.getCounts(cHandle, "T000001")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    # First part
    cC, wC, pC = theIndex.getCounts(cHandle, "T000011")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    assert theProject.closeProject()
Exemplo n.º 2
0
def testCoreIndex_ExtractData(nwMinimal, dummyGUI):
    """Check the index data extraction functions.
    """
    theProject = NWProject(dummyGUI)
    theProject.projTree.setSeed(42)
    assert theProject.openProject(nwMinimal)

    theIndex = NWIndex(theProject, dummyGUI)
    nHandle = theProject.newFile("Hello", nwItemClass.NOVEL, "a508bb932959c")
    cHandle = theProject.newFile("Jane", nwItemClass.CHARACTER,
                                 "afb3043c7b2b3")

    assert theIndex.getNovelData("", "") is None
    assert theIndex.getNovelData("a508bb932959c", "") is None

    assert theIndex.scanText(cHandle, ("# Jane Smith\n" "@tag: Jane\n"))
    assert theIndex.scanText(nHandle, ("# Hello World!\n"
                                       "@pov: Jane\n"
                                       "@char: Jane\n\n"
                                       "% this is a comment\n\n"
                                       "This is a story about Jane Smith.\n\n"
                                       "Well, not really.\n"))

    # The novel structure should contain the pointer to the novel file header
    theKeys = []
    for aKey, _, _, _ in theIndex.novelStructure():
        theKeys.append(aKey)

    assert theKeys == ["%s:T000001" % nHandle]

    # Check that excluded files can be skipped
    theProject.projTree[nHandle].setExported(False)

    theKeys = []
    for aKey, _, _, _ in theIndex.novelStructure(skipExcluded=False):
        theKeys.append(aKey)

    assert theKeys == ["%s:T000001" % nHandle]

    theKeys = []
    for aKey, _, _, _ in theIndex.novelStructure(skipExcluded=True):
        theKeys.append(aKey)

    assert theKeys == []

    theKeys = []
    for aKey, _, _, _ in theIndex.novelStructure():
        theKeys.append(aKey)

    assert theKeys == []

    # The novel file should have the correct counts
    cC, wC, pC = theIndex.getCounts(nHandle)
    assert cC == 62  # Characters in text and title only
    assert wC == 12  # Words in text and title only
    assert pC == 2  # Paragraphs in text only

    # getReferences
    # =============

    # Look up an ivalid handle
    theRefs = theIndex.getReferences("Not a handle")
    assert theRefs["@pov"] == []
    assert theRefs["@char"] == []

    # The novel file should now refer to Jane as @pov and @char
    theRefs = theIndex.getReferences(nHandle)
    assert theRefs["@pov"] == ["Jane"]
    assert theRefs["@char"] == ["Jane"]

    # getBackReferenceList
    # ====================

    # None handle should return an empty dict
    assert theIndex.getBackReferenceList(None) == {}

    # The character file should have a record of the reference from the novel file
    theRefs = theIndex.getBackReferenceList(cHandle)
    assert theRefs == {nHandle: "T000001"}

    # getTagSource
    # ============

    assert theIndex.getTagSource("Jane") == (cHandle, 2, "T000001")
    assert theIndex.getTagSource("John") == (None, 0, "T000000")

    # getCounts
    # =========
    # For whole text and sections

    # Get section counts for a novel file
    assert theIndex.scanText(
        nHandle, ("# Hello World!\n"
                  "@pov: Jane\n"
                  "@char: Jane\n\n"
                  "% this is a comment\n\n"
                  "This is a story about Jane Smith.\n\n"
                  "Well, not really.\n\n"
                  "# Hello World!\n"
                  "@pov: Jane\n"
                  "@char: Jane\n\n"
                  "% this is a comment\n\n"
                  "This is a story about Jane Smith.\n\n"
                  "Well, not really. She's still awesome though.\n"))
    # Whole document
    cC, wC, pC = theIndex.getCounts(nHandle)
    assert cC == 152
    assert wC == 28
    assert pC == 4

    # First part
    cC, wC, pC = theIndex.getCounts(nHandle, "T000001")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    # Second part
    cC, wC, pC = theIndex.getCounts(nHandle, "T000011")
    assert cC == 90
    assert wC == 16
    assert pC == 2

    # Get section counts for a note file
    assert theIndex.scanText(
        cHandle, ("# Hello World!\n"
                  "@pov: Jane\n"
                  "@char: Jane\n\n"
                  "% this is a comment\n\n"
                  "This is a story about Jane Smith.\n\n"
                  "Well, not really.\n\n"
                  "# Hello World!\n"
                  "@pov: Jane\n"
                  "@char: Jane\n\n"
                  "% this is a comment\n\n"
                  "This is a story about Jane Smith.\n\n"
                  "Well, not really. She's still awesome though.\n"))
    # Whole document
    cC, wC, pC = theIndex.getCounts(cHandle)
    assert cC == 152
    assert wC == 28
    assert pC == 4

    # First part
    cC, wC, pC = theIndex.getCounts(cHandle, "T000001")
    assert cC == 62
    assert wC == 12
    assert pC == 2

    # Second part
    cC, wC, pC = theIndex.getCounts(cHandle, "T000011")
    assert cC == 90
    assert wC == 16
    assert pC == 2

    # Novel Stats
    # ===========

    hHandle = theProject.newFile("Chapter", nwItemClass.NOVEL, "a508bb932959c")
    sHandle = theProject.newFile("Scene One", nwItemClass.NOVEL,
                                 "a508bb932959c")
    tHandle = theProject.newFile("Scene Two", nwItemClass.NOVEL,
                                 "a508bb932959c")

    theProject.projTree[hHandle].itemLayout == nwItemLayout.CHAPTER
    theProject.projTree[sHandle].itemLayout == nwItemLayout.SCENE
    theProject.projTree[tHandle].itemLayout == nwItemLayout.SCENE

    assert theIndex.scanText(hHandle, "## Chapter One\n\n")
    assert theIndex.scanText(sHandle, "### Scene One\n\n")
    assert theIndex.scanText(tHandle, "### Scene Two\n\n")

    assert theIndex._listNovelHandles(False) == [
        nHandle, hHandle, sHandle, tHandle
    ]
    assert theIndex._listNovelHandles(True) == [hHandle, sHandle, tHandle]

    # Add a fake handle to the tree and check that it's ignored
    theProject.projTree._treeOrder.append("0000000000000")
    assert theIndex._listNovelHandles(False) == [
        nHandle, hHandle, sHandle, tHandle
    ]
    theProject.projTree._treeOrder.remove("0000000000000")

    # Extract stats
    assert theIndex.getNovelWordCount(False) == 34
    assert theIndex.getNovelWordCount(True) == 6
    assert theIndex.getNovelTitleCounts(False) == [0, 2, 1, 2, 0]
    assert theIndex.getNovelTitleCounts(True) == [0, 0, 1, 2, 0]

    # Table of Contents
    assert theIndex.getTableOfContents(0, True) == []
    assert theIndex.getTableOfContents(1, True) == []
    assert theIndex.getTableOfContents(2, True) == [
        ("%s:T000001" % hHandle, 2, "Chapter One", 6),
    ]
    assert theIndex.getTableOfContents(3, True) == [
        ("%s:T000001" % hHandle, 2, "Chapter One", 2),
        ("%s:T000001" % sHandle, 3, "Scene One", 2),
        ("%s:T000001" % tHandle, 3, "Scene Two", 2),
    ]

    assert theIndex.getTableOfContents(0, False) == []
    assert theIndex.getTableOfContents(1, False) == [
        ("%s:T000001" % nHandle, 1, "Hello World!", 12),
        ("%s:T000011" % nHandle, 1, "Hello World!", 22),
    ]

    # Header Word Counts
    bHandle = "0000000000000"
    assert theIndex.getHandleWordCounts(bHandle) == []
    assert theIndex.getHandleWordCounts(hHandle) == [("%s:T000001" % hHandle,
                                                      2)]
    assert theIndex.getHandleWordCounts(sHandle) == [("%s:T000001" % sHandle,
                                                      2)]
    assert theIndex.getHandleWordCounts(tHandle) == [("%s:T000001" % tHandle,
                                                      2)]
    assert theIndex.getHandleWordCounts(nHandle) == [
        ("%s:T000001" % nHandle, 12), ("%s:T000011" % nHandle, 16)
    ]

    assert theProject.closeProject()

    # Header Record
    bHandle = "0000000000000"
    assert theIndex.getHandleHeaders(bHandle) == []
    assert theIndex.getHandleHeaders(hHandle) == [("T000001", "H2",
                                                   "Chapter One")]
    assert theIndex.getHandleHeaders(sHandle) == [("T000001", "H3",
                                                   "Scene One")]
    assert theIndex.getHandleHeaders(tHandle) == [("T000001", "H3",
                                                   "Scene Two")]
    assert theIndex.getHandleHeaders(nHandle) == [
        ("T000001", "H1", "Hello World!"), ("T000011", "H1", "Hello World!")
    ]