示例#1
0
def testCoreTools_CountWords():
    """Test the word counter and the exclusion filers.
    """
    testText = (
        "# Heading One\n"
        "## Heading Two\n"
        "### Heading Three\n"
        "#### Heading Four\n"
        "\n"
        "@tag: value\n"
        "\n"
        "% A comment that should n ot be counted.\n"
        "\n"
        "The first paragraph.\n"
        "\n"
        "The second paragraph.\n"
        "\n"
        "\n"
        "The third paragraph.\n"
        "\n"
        "Dashes\u2013and even longer\u2014dashes."
    )
    cC, wC, pC = countWords(testText)

    assert cC == 138
    assert wC == 22
    assert pC == 4
示例#2
0
 def _indexWordCounts(self, tHandle, isNovel, theText, nTitle):
     """Count text stats and save the counts to the index.
     """
     cC, wC, pC = countWords(theText)
     sTitle = "T%06d" % nTitle
     if isNovel:
         if tHandle in self._novelIndex:
             if sTitle in self._novelIndex[tHandle]:
                 self._novelIndex[tHandle][sTitle]["cCount"] = cC
                 self._novelIndex[tHandle][sTitle]["wCount"] = wC
                 self._novelIndex[tHandle][sTitle]["pCount"] = pC
                 self._novelIndex[tHandle][sTitle]["updated"] = round(time())
     else:
         if tHandle in self._noteIndex:
             if sTitle in self._noteIndex[tHandle]:
                 self._noteIndex[tHandle][sTitle]["cCount"] = cC
                 self._noteIndex[tHandle][sTitle]["wCount"] = wC
                 self._noteIndex[tHandle][sTitle]["pCount"] = pC
                 self._noteIndex[tHandle][sTitle]["updated"] = round(time())
     return
示例#3
0
def testCountWords():
    """Test the word counter and the exclusion filers.
    """
    testText = ("# Heading One\n"
                "## Heading Two\n"
                "### Heading Three\n"
                "#### Heading Four\n"
                "\n"
                "@tag: value\n"
                "\n"
                "% A comment that should n ot be counted.\n"
                "\n"
                "The first paragraph.\n"
                "\n"
                "The second paragraph.\n"
                "\n"
                "\n"
                "The third paragraph.\n")
    cC, wC, pC = countWords(testText)

    assert cC == 108
    assert wC == 17
    assert pC == 3
示例#4
0
    def scanText(self, tHandle, theText):
        """Scan a piece of text associated with a handle. This will
        update the indices accordingly. This function takes the handle
        and text as separate inputs as we want to primarily scan the
        files before we save them, unless we're rebuilding the index.
        """
        theItem = self.theProject.projTree[tHandle]
        theRoot = self.theProject.projTree.getRootItem(tHandle)

        if theItem is None:
            logger.info("Not indexing unknown item %s" % tHandle)
            return False
        if theItem.itemType != nwItemType.FILE:
            logger.info("Not indexing non-file item %s" % tHandle)
            return False
        if theItem.itemLayout == nwItemLayout.NO_LAYOUT:
            logger.info("Not indexing no-layout item %s" % tHandle)
            return False
        if theItem.itemParent is None:
            logger.info("Not indexing orphaned item %s" % tHandle)
            return False

        # Run word counter for the whole text
        cC, wC, pC = countWords(theText)
        self._textCounts[tHandle] = [cC, wC, pC]

        # If the file is archived or trashed, we don't index the file itself
        if self.theProject.projTree.isTrashRoot(theItem.itemParent):
            logger.info("Not indexing trash item %s" % tHandle)
            return False
        if theRoot.itemClass == nwItemClass.ARCHIVE:
            logger.info("Not indexing archived item %s" % tHandle)
            return False

        itemClass  = theItem.itemClass
        itemLayout = theItem.itemLayout

        logger.debug("Indexing item with handle %s" % tHandle)

        # Check file type, and reset its old index
        # Also add a dummy entry T000000 in case the file has no title
        self._refIndex[tHandle] = {}
        self._refIndex[tHandle]["T000000"] = {
            "tags"    : [],
            "updated" : round(time()),
        }
        if itemLayout == nwItemLayout.NOTE:
            self._novelIndex.pop(tHandle, None)
            self._noteIndex[tHandle] = {}
            isNovel = False
        else:
            self._novelIndex[tHandle] = {}
            self._noteIndex.pop(tHandle, None)
            isNovel = True

        # Also clear references to file in tag index
        clearTags = []
        for aTag in self._tagIndex:
            if self._tagIndex[aTag][1] == tHandle:
                clearTags.append(aTag)
        for aTag in clearTags:
            self._tagIndex.pop(aTag)

        nLine  = 0
        nTitle = 0
        theLines = theText.splitlines()
        for aLine in theLines:
            aLine  = aLine
            nLine += 1
            nChar  = len(aLine.strip())
            if nChar == 0:
                continue

            if aLine.startswith(r"#"):
                isTitle = self._indexTitle(tHandle, isNovel, aLine, nLine, itemLayout)
                if isTitle and nLine > 0:
                    if nTitle > 0:
                        lastText = "\n".join(theLines[nTitle-1:nLine-1])
                        self._indexWordCounts(tHandle, isNovel, lastText, nTitle)
                    nTitle = nLine

            elif aLine.startswith(r"@"):
                self._indexNoteRef(tHandle, aLine, nLine, nTitle)
                self._indexTag(tHandle, aLine, nLine, nTitle, itemClass)

            elif aLine.startswith(r"%"):
                if nTitle > 0:
                    toCheck = aLine[1:].lstrip()
                    synTag = toCheck[:9].lower()
                    tLen = len(aLine)
                    cLen = len(toCheck)
                    cOff = tLen - cLen
                    if synTag == "synopsis:":
                        self._indexSynopsis(tHandle, isNovel, aLine[cOff+9:].strip(), nTitle)

        # Count words for remaining text after last heading
        if nTitle > 0:
            lastText = "\n".join(theLines[nTitle-1:])
            self._indexWordCounts(tHandle, isNovel, lastText, nTitle)

        # Index page with no titles and references
        if nTitle == 0:
            self._indexPage(tHandle, isNovel, itemLayout)
            self._indexWordCounts(tHandle, isNovel, theText, nTitle)

        # Update timestamps for index changes
        nowTime = round(time())
        self._timeIndex = nowTime
        if isNovel:
            self._timeNovel = nowTime
        else:
            self._timeNotes = nowTime

        return True