def test_parseImgLinks(self): httpretty.enable() resources_dir = os.path.dirname(os.path.abspath(__file__)) for resource in self._resources: with open(os.path.join(HTTP_RESOURCES_PATH, resource["file"])) as html: httpretty.register_uri(httpretty.GET, resource["site"], body=html.read()) imgs = ImageScrapy(resource["site"]) links = imgs.parseImgLinks() self.assertSetEqual(links, resource["result"], "Test HTML page link parsing") httpretty.disable() httpretty.reset()
def site_scrape(site,depth=1): """ Takes a URL and scrapes all the images """ click.echo(" - About to scrape images from: {0}".format(site)) scrap_images = ImageScrapy(site) dirName = scrap_images.filePath() img_list = scrap_images.parseImgLinks(depth) if len(img_list): click.echo(" Scraping collected images:") scrap_images.downloadImages(dirName, img_list) else: click.echo(" Images not found on this page")
def test_parseImgLinks(self): httpretty.enable() resources_dir = os.path.dirname(os.path.abspath(__file__)) for resource in self._resources: with open(os.path.join(HTTP_RESOURCES_PATH, resource['file'])) as html: httpretty.register_uri(httpretty.GET, resource['site'], body=html.read()) imgs = ImageScrapy(resource['site']) links = imgs.parseImgLinks() self.assertSetEqual(links, resource['result'], "Test HTML page link parsing") httpretty.disable() httpretty.reset()
def site_scrape(site, depth=1): """ Takes a URL and scrapes all the images """ click.echo(" - About to scrape images from: {0}".format(site)) scrap_images = ImageScrapy(site) dirName = scrap_images.filePath() img_list = scrap_images.parseImgLinks(depth) if len(img_list): click.echo(" Scraping collected images:") scrap_images.downloadImages(dirName, img_list) else: click.echo(" Images not found on this page")
def test_instantiation(self): self.assertIsInstance(ImageScrapy(self._resources[0]['site']), ImageScrapy, "Test ImageScrapy instance")