def _crawl(crawler: Crawler, args: argparse.Namespace) -> int: """Crawl using the provided crawler. Args: crawler: The crawler object. args: The command line arguments Returns: 0 on success, else 1 """ failure_occured = False try: crawler.crawl() _print_dead_links(crawler.dead_links) except CrawlerException as exception: logger.error(str(exception)) failure_occured = True except Exception as exception: failure_occured = True # Using Broad exception to catch all errors to give a proper error message logger.error("Error occured while crawling") if args.show_exception_tb: # To keep the output clean logger.exception(exception) return 1 if failure_occured else 0
class ConverterTest(unittest.TestCase): def test_crawl(self): self.crawler = Crawler() self.assertIsInstance(self.crawler.crawl(), list) if __name__ == '__main__': unittest.main()
def start(): crawler = Crawler() crawler_results = crawler.crawl() for crawler_result in crawler_results: attribute_string = crawler_result.get('attribute_string') attribute_usd_price = crawler_result.get('attribute_usd_price') attribute = crawler_result.get('attribute') converter = Converter() print attribute,converter.convert(attribute_usd_price,attribute_string)
import argparse from src import settings from src.api import BildungsserverFeed, LocalXmlFeed, LocalRssFeed from src.crawler import Crawler, SiemensCrawler, BildungsserverCrawler from src.exceptions import ConfigurationError if __name__ == '__main__': if settings.CRAWLER.lower() == 'bildungsserver': Crawler = BildungsserverCrawler elif settings.CRAWLER.lower() == 'siemens-stiftung': Crawler = SiemensCrawler else: raise ConfigurationError("settings.CRAWLER must be set.") dry_run = settings.DRY_RUN crawler = Crawler(dry_run=dry_run) crawler.crawl()