def testLinkExtract(self): """Testing for recursive link extraction from given page and following it""" #Initialize the extractor configFile = 'linkExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a link item item = LinkArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract links from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted print extractedData
def testLinkExtract(self): """Testing for recursive link extraction from given page and following it""" module_logger.debug('method testLinkExtract was called') #Initialize the extractor configFile = os.path.dirname(__file__) + '/linkExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a link item item = LinkArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract links from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted #print extractedData trueData = json.load(open(os.path.dirname(__file__) + '/correctLinkData','r')) self.assertTrue(extractedData == trueData) module_logger.debug('method testLinkExtract completed successfully')