def testLinkExtract(self): """Testing for recursive link extraction from given page and following it""" #Initialize the extractor configFile = 'linkExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a link item item = LinkArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract links from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted print extractedData
def testImageExtract(self): """Testing for image extraction from given page""" #Initialize the extractor configFile = 'imageExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create an image item item = ImageArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract images from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted trueData = json.load(open('correctImageData','r')) self.assertTrue(extractedData == trueData)