def testCoreTools_CountWords(): """Test the word counter and the exclusion filers. """ testText = ( "# Heading One\n" "## Heading Two\n" "### Heading Three\n" "#### Heading Four\n" "\n" "@tag: value\n" "\n" "% A comment that should n ot be counted.\n" "\n" "The first paragraph.\n" "\n" "The second paragraph.\n" "\n" "\n" "The third paragraph.\n" "\n" "Dashes\u2013and even longer\u2014dashes." ) cC, wC, pC = countWords(testText) assert cC == 138 assert wC == 22 assert pC == 4
def _indexWordCounts(self, tHandle, isNovel, theText, nTitle): """Count text stats and save the counts to the index. """ cC, wC, pC = countWords(theText) sTitle = "T%06d" % nTitle if isNovel: if tHandle in self._novelIndex: if sTitle in self._novelIndex[tHandle]: self._novelIndex[tHandle][sTitle]["cCount"] = cC self._novelIndex[tHandle][sTitle]["wCount"] = wC self._novelIndex[tHandle][sTitle]["pCount"] = pC self._novelIndex[tHandle][sTitle]["updated"] = round(time()) else: if tHandle in self._noteIndex: if sTitle in self._noteIndex[tHandle]: self._noteIndex[tHandle][sTitle]["cCount"] = cC self._noteIndex[tHandle][sTitle]["wCount"] = wC self._noteIndex[tHandle][sTitle]["pCount"] = pC self._noteIndex[tHandle][sTitle]["updated"] = round(time()) return
def testCountWords(): """Test the word counter and the exclusion filers. """ testText = ("# Heading One\n" "## Heading Two\n" "### Heading Three\n" "#### Heading Four\n" "\n" "@tag: value\n" "\n" "% A comment that should n ot be counted.\n" "\n" "The first paragraph.\n" "\n" "The second paragraph.\n" "\n" "\n" "The third paragraph.\n") cC, wC, pC = countWords(testText) assert cC == 108 assert wC == 17 assert pC == 3
def scanText(self, tHandle, theText): """Scan a piece of text associated with a handle. This will update the indices accordingly. This function takes the handle and text as separate inputs as we want to primarily scan the files before we save them, unless we're rebuilding the index. """ theItem = self.theProject.projTree[tHandle] theRoot = self.theProject.projTree.getRootItem(tHandle) if theItem is None: logger.info("Not indexing unknown item %s" % tHandle) return False if theItem.itemType != nwItemType.FILE: logger.info("Not indexing non-file item %s" % tHandle) return False if theItem.itemLayout == nwItemLayout.NO_LAYOUT: logger.info("Not indexing no-layout item %s" % tHandle) return False if theItem.itemParent is None: logger.info("Not indexing orphaned item %s" % tHandle) return False # Run word counter for the whole text cC, wC, pC = countWords(theText) self._textCounts[tHandle] = [cC, wC, pC] # If the file is archived or trashed, we don't index the file itself if self.theProject.projTree.isTrashRoot(theItem.itemParent): logger.info("Not indexing trash item %s" % tHandle) return False if theRoot.itemClass == nwItemClass.ARCHIVE: logger.info("Not indexing archived item %s" % tHandle) return False itemClass = theItem.itemClass itemLayout = theItem.itemLayout logger.debug("Indexing item with handle %s" % tHandle) # Check file type, and reset its old index # Also add a dummy entry T000000 in case the file has no title self._refIndex[tHandle] = {} self._refIndex[tHandle]["T000000"] = { "tags" : [], "updated" : round(time()), } if itemLayout == nwItemLayout.NOTE: self._novelIndex.pop(tHandle, None) self._noteIndex[tHandle] = {} isNovel = False else: self._novelIndex[tHandle] = {} self._noteIndex.pop(tHandle, None) isNovel = True # Also clear references to file in tag index clearTags = [] for aTag in self._tagIndex: if self._tagIndex[aTag][1] == tHandle: clearTags.append(aTag) for aTag in clearTags: self._tagIndex.pop(aTag) nLine = 0 nTitle = 0 theLines = theText.splitlines() for aLine in theLines: aLine = aLine nLine += 1 nChar = len(aLine.strip()) if nChar == 0: continue if aLine.startswith(r"#"): isTitle = self._indexTitle(tHandle, isNovel, aLine, nLine, itemLayout) if isTitle and nLine > 0: if nTitle > 0: lastText = "\n".join(theLines[nTitle-1:nLine-1]) self._indexWordCounts(tHandle, isNovel, lastText, nTitle) nTitle = nLine elif aLine.startswith(r"@"): self._indexNoteRef(tHandle, aLine, nLine, nTitle) self._indexTag(tHandle, aLine, nLine, nTitle, itemClass) elif aLine.startswith(r"%"): if nTitle > 0: toCheck = aLine[1:].lstrip() synTag = toCheck[:9].lower() tLen = len(aLine) cLen = len(toCheck) cOff = tLen - cLen if synTag == "synopsis:": self._indexSynopsis(tHandle, isNovel, aLine[cOff+9:].strip(), nTitle) # Count words for remaining text after last heading if nTitle > 0: lastText = "\n".join(theLines[nTitle-1:]) self._indexWordCounts(tHandle, isNovel, lastText, nTitle) # Index page with no titles and references if nTitle == 0: self._indexPage(tHandle, isNovel, itemLayout) self._indexWordCounts(tHandle, isNovel, theText, nTitle) # Update timestamps for index changes nowTime = round(time()) self._timeIndex = nowTime if isNovel: self._timeNovel = nowTime else: self._timeNotes = nowTime return True