def get_text(text): """This function returns the text in a document and excludes tags and comments.""" h = myHTMLParser.myHTMLParser() h.feed(text) return h.returnFiltered()
def get_docID(text): """This function returns the docID.""" h = myHTMLParser.myHTMLParser() h.feed(text) return h.returnDocID()