def test_service_configuration_load(self): # then test a valid mysql configuration config = ServiceConfiguration() TestServiceConfiguration.write_valid_1() try: config.load() self.assertEqual(config.crawler.app_id, "231209") self.assertEqual(config.crawler.app_secret, "AlPhA4NuMeRiC20") self.assertEqual(config.crawler.research_group, "d0b7f41f-ad37-3b47-ab70-9feac35557cc") self.assertEqual(config.cache.profile_page_pattern, "www1.in.tum.de/:firstname-:lastname") except InvalidConfigurationException as e: self.fail(e) # then test a valid sqlite configuration config = ServiceConfiguration() TestServiceConfiguration.write_valid_2() try: config.load() self.assertEqual(config.crawler.app_id, "231209") self.assertEqual(config.crawler.app_secret, "AlPhA4NuMeRiC20") self.assertEqual(config.crawler.research_group, "d0b7f41f-ad37-3b47-ab70-9feac35557cc") self.assertEqual(config.database.engine, "sqlite") self.assertEqual(config.database.path, "") self.assertEqual(config.cache.profile_page_pattern, "") except InvalidConfigurationException as e: self.fail(e)
class MendeleyCache(Flask): def __init__(self, *args, **kwargs): super(MendeleyCache, self).__init__(*args, **kwargs) # Read configuration self.configuration = ServiceConfiguration() self.configuration.load() log.info("Configuration has been loaded") # Create service controllers self.data_controller = DataController(self.configuration.database) self.data_controller.assert_schema() log.info("Schema has been checked") # Create crawler based on configuration self.crawler = None """:type : AbstractCrawler""" if not self.configuration.uses_mendeley: log.info("Pipeline uses FileCrawler") self.crawler = FileCrawler() else: from mendeleycache.crawler.sdk_crawler import SDKCrawler log.info( "Pipeline uses SDKCrawler".format( app_id=self.configuration.crawler.app_id, app_secret=self.configuration.crawler.app_secret ) ) self.crawler = SDKCrawler( app_id=self.configuration.crawler.app_id, app_secret=self.configuration.crawler.app_secret ) # Create the pipeline self.crawl_controller = CrawlController(self.crawler, self.configuration.crawler.research_group) self.analysis_controller = AnalysisController() self.pipeline_controller = PipelineController( data_controller=self.data_controller, crawl_controller=self.crawl_controller, analysis_controller=self.analysis_controller, ) log.info("Pipeline has been initialized") # Create the routing controllers self.fields_controller = FieldsController(self, self.data_controller) self.profiles_controller = ProfilesController(self, self.data_controller, self.configuration.cache) self.publications_controller = DocumentsController(self, self.data_controller) self.cache_controller = CacheController( self, self.data_controller, self.pipeline_controller, self.configuration ) self.root_controller = RootController(self, self.data_controller, self.configuration) # Register the routes self.register_routes() log.info("Routes have been registered") log.info("MendeleyCache has been initialized") def register_routes(self): self.fields_controller.register() self.profiles_controller.register() self.publications_controller.register() self.cache_controller.register() self.root_controller.register()
class MendeleyCache(Flask): def __init__(self, *args, **kwargs): super(MendeleyCache, self).__init__(*args, **kwargs) # Read configuration self.configuration = ServiceConfiguration() self.configuration.load() log.info("Configuration has been loaded") # Create service controllers self.data_controller = DataController(self.configuration.database) self.data_controller.assert_schema() log.info("Schema has been checked") # Create crawler based on configuration self.crawler = None """:type : AbstractCrawler""" if not self.configuration.uses_mendeley: log.info("Pipeline uses FileCrawler") self.crawler = FileCrawler() else: from mendeleycache.crawler.sdk_crawler import SDKCrawler log.info("Pipeline uses SDKCrawler".format( app_id=self.configuration.crawler.app_id, app_secret=self.configuration.crawler.app_secret)) self.crawler = SDKCrawler( app_id=self.configuration.crawler.app_id, app_secret=self.configuration.crawler.app_secret) # Create the pipeline self.crawl_controller = CrawlController( self.crawler, self.configuration.crawler.research_group) self.analysis_controller = AnalysisController() self.pipeline_controller = PipelineController( data_controller=self.data_controller, crawl_controller=self.crawl_controller, analysis_controller=self.analysis_controller) log.info("Pipeline has been initialized") # Create the routing controllers self.fields_controller = FieldsController(self, self.data_controller) self.profiles_controller = ProfilesController(self, self.data_controller, self.configuration.cache) self.publications_controller = DocumentsController( self, self.data_controller) self.cache_controller = CacheController(self, self.data_controller, self.pipeline_controller, self.configuration) self.root_controller = RootController(self, self.data_controller, self.configuration) # Register the routes self.register_routes() log.info("Routes have been registered") log.info("MendeleyCache has been initialized") def register_routes(self): self.fields_controller.register() self.profiles_controller.register() self.publications_controller.register() self.cache_controller.register() self.root_controller.register()
# Prepare loader = TestLoader() runner = unittest.TextTestRunner(verbosity=2) # Create suites all = loader.discover(start_dir=project_root) # Run suites runner.run(all) elif command == "prepare": log.info("Preparing environment for gunicorn workers") # Read configuration configuration = ServiceConfiguration() configuration.load() log.info("Configuration has been loaded") # Create data controller and assert schema # That will remove the race conditions of the gunicorn worker if it's done on every startup data_controller = DataController(configuration.database) data_controller.assert_schema() # Pipeline runner elif command == "pipeline": config = ServiceConfiguration() config.load() data_controller = DataController(config.database) if not data_controller.is_initialized(): log.critical("Database is not initialized")