def test_content_only_from_obj(): # acquire data from a test html page built in cleanHtme::buildTestHtml clsObj = CaptureContent() name = dateFinder.buildTestHtml() soup = BeautifulSoup(name) exclude = [] minLength = 10 # we should have one html object with both 'dogma' and 'karma', three with any of these andOr = "and" include = ["dogma", "karma"] content = clsObj.captureContent(soup, include, exclude, minLength=minLength, andOr=andOr) assert len(content) == 1 andOr = "or" content = clsObj.captureContent(soup, include, exclude, minLength=minLength, andOr=andOr) assert len(content) == 3 # we should have three with 'dogma' only, and one if we exclude mentions of 'fang' include = ["dogma"] content = clsObj.captureContent(soup, include, exclude, minLength=minLength, andOr=andOr) assert len(content) == 3 exclude = ["fang"] content = clsObj.captureContent(soup, include, exclude, minLength=minLength, andOr=andOr) assert len(content) == 1 # make sure when run from the main routine, the expected information is returned results = clsObj.capture_main( name=name, streamType="o", include=include, exclude=exclude, minLength=minLength, andOr=andOr ) assert len(results["polishedCont"]) == 1 assert len(results["contentAsSoupObjects"]) == 1 assert len(results["metaData"]) == 2 assert len(results["links"]) == 2
def test_main_with_obj(): # acquire data from a test html page built in cleanHtml::buildTestHtml clsObj = CaptureContent() name = dateFinder.buildTestHtml() soup = BeautifulSoup(name) exclude = [] minLength = 10 streamType = "o" # we should have one html object with both 'dogma' and 'karma', three with any of these andOr = "and" include = ["dogma", "karma"] clsObj.capture_main(name, streamType, include, exclude, minLength, andOr, socket_timeout=None) a = 1