def getDocumentTokens(self, doc):
     """
         Returns a dict of all terms in the document
     """
     full_text=doc.getFullDocumentText(True,False)
     full_text=full_text.lower()
     tokens=tokenizeText(full_text)
     # remove many stopwords. hack!
     tokens=[token for token in tokens
         if token not in local_stopwords_list]
     counts=getDictOfTokenCounts(tokens)
     return counts
    def textFormatting(self, text, glob):
        """
            Text formatting function to be passed to JsonDoc.prettyPrintDocumentHTML()

            Args:
                text: text of sentence
                glob: globals dictionary
        """
        res=[]
        text=text.strip(".")
        tokens=tokenizeText(text)
        for token in tokens:
            token_dict=self.term_info.get(token.lower(),
                {"token_id":0, "references":[]})
            references=" ".join(token_dict["references"])
            classes=str(token_dict["token_id"]) + " " + references
            res.append('<span term_id="%s" class="%s">%s</span>' %
                (str(token_dict["token_id"]),classes,token))

        result=" ".join(res).strip()
        return result.strip(".")