def test_run_register_metrics(self): from baskerville.models.engine import BaskervilleAnalyticsEngine with mock.patch.object(BaskervilleAnalyticsEngine, '_set_up_pipeline') as mock__set_up_pipeline: with mock.patch.object(BaskervilleAnalyticsEngine, '_register_performance_stats' ) as mock_register_performance_stats: pipeline = mock.MagicMock() mock__set_up_pipeline.return_value = pipeline mock_register_performance_stats.return_value = \ 'should return a performance_stats instance' metrics = { 'performance': True # completely mocked just to step into # the register_performance_stats step } engine = BaskervilleAnalyticsEngine(RunType.kafka, self.test_config, register_metrics=True) engine.config.engine.metrics = MetricsConfig(metrics) engine.run() mock__set_up_pipeline.assert_called_once() mock_register_performance_stats.assert_called_once() pipeline.run.assert_called_once() self.assertTrue(engine.performance_stats == 'should return a performance_stats instance')
def test__set_up_pipeline_auto_spark(self): from baskerville.models.pipelines import KafkaPipeline with mock.patch.object(KafkaPipeline, '__init__') as mock_pipeline: from baskerville.models.engine import BaskervilleAnalyticsEngine mock_pipeline.return_value = None engine = BaskervilleAnalyticsEngine(RunType.kafka, self.test_config) self.assertTrue(engine.run_type == RunType.kafka) engine.config.engine.use_spark = True p = engine._set_up_pipeline() mock_pipeline.assert_called_once() self.assertTrue(isinstance(p, KafkaPipeline))
def test__set_up_pipeline_manual_raw_logs_path_spark(self): from baskerville.models.pipelines import RawLogPipeline with mock.patch.object(RawLogPipeline, '__init__') as mock_pipeline: from baskerville.models.engine import BaskervilleAnalyticsEngine mock_pipeline.return_value = None engine = BaskervilleAnalyticsEngine(RunType.rawlog, self.test_config) self.assertTrue(engine.run_type == RunType.rawlog) engine.config.engine.manual.host = None engine.config.engine.manual.raw_logs_path = 'some_path' engine.config.engine.manual.chunk_size = 0 engine.config.engine.use_spark = True p = engine._set_up_pipeline() mock_pipeline.assert_called_once() self.assertTrue(isinstance(p, RawLogPipeline))
def main(): """ Baskerville commandline arguments :return: """ global baskerville_engine, logger parser = argparse.ArgumentParser() parser.add_argument( "pipeline", help="Pipeline to use: es, rawlog, or kafka", ) parser.add_argument( "-s", "--simulate", dest="simulate", action="store_true", help="Simulate real-time run using kafka", ) parser.add_argument( "-e", "--startexporter", dest="start_exporter", action="store_true", help="Start the Baskerville Prometheus exporter at the specified " "in the configuration port", ) parser.add_argument( "-t", "--testmodel", dest="test_model", help="Add a test model in the models table", default=False, action="store_true" ) parser.add_argument( "-c", "--conf", action="store", dest="conf_file", default=os.path.join(src_dir, '..', 'conf', 'baskerville.yaml'), help="Path to config file" ) parser.add_argument( "-t", "--testmodel", dest="test_model", help="Add a test model in the models table", default=False, action="store_true" ) args = parser.parse_args() conf = parse_config(path=args.conf_file) baskerville_engine = BaskervilleAnalyticsEngine( args.pipeline, conf, register_metrics=args.start_exporter ) logger = get_logger( __name__, logging_level=baskerville_engine.config.engine.log_level, output_file=baskerville_engine.config.engine.logpath ) # start simulation if specified if args.simulate: spark = None if baskerville_engine.config.engine.use_spark: from baskerville.spark import get_spark_session spark = get_spark_session() # baskerville.pipeline.spark logger.info('Starting simulation...') run_simulation(baskerville_engine.config, spark) # start baskerville prometheus exporter if specified if args.start_exporter: if not baskerville_engine.config.engine.metrics: raise RuntimeError(f'Cannot start exporter without metrics config') port = baskerville_engine.config.engine.metrics.port start_http_server(port) logger.info(f'Starting Baskerville Exporter at ' f'http://localhost:{port}') # populate with test data if specified if args.test_model: add_model_to_database(conf['database']) for p in PROCESS_LIST[::-1]: print(f"{p.name} starting...") p.start() logger.info('Starting Baskerville Engine...') baskerville_engine.run()
def test_register_performance_stats(self): from baskerville.models.engine import BaskervilleAnalyticsEngine with mock.patch.object(BaskervilleAnalyticsEngine, '_set_up_pipeline') as _: engine = BaskervilleAnalyticsEngine(RunType.kafka, self.test_config) mock_pipeline = mock.MagicMock() mock_feature1 = mock.MagicMock() mock_feature2 = mock.MagicMock() mock_feature1.feature_name = 'mock_feature1' mock_feature2.feature_name = 'mock_feature2' mock_feature1.compute = lambda a: a mock_feature2.compute = lambda a: a mock_pipeline.feature_manager = mock.MagicMock() mock_pipeline.feature_manager.active_features = [ mock_feature1, mock_feature2 ] mock_pipeline.test_method_name_1.__name__ = 'test_method_name_1' mock_pipeline.test_method_name_2.__name__ = 'test_method_name_2' mock_pipeline.request_set_cache.test_method_name_3.__name__ = \ 'test_method_name_3' mock_pipeline.request_set_cache.test_method_name_4.__name__ = \ 'test_method_name_4' engine.pipeline = mock_pipeline metrics = { 'performance': { 'pipeline': ['test_method_name_1', 'test_method_name_2'], 'request_set_cache': ['test_method_name_3', 'test_method_name_4'], 'features': True, } } engine.config.engine.metrics = MetricsConfig(metrics) performance_stats = engine._register_performance_stats() self.assertTrue( f'{performance_stats._prefix}timer_for_pipeline_test_method_name_1' in performance_stats.registry) self.assertTrue( f'{performance_stats._prefix}timer_for_pipeline_test_method_name_2' in performance_stats.registry) self.assertTrue( f'{performance_stats._prefix}timer_for_request_set_cache_test_method_name_3' in performance_stats.registry) self.assertTrue( f'{performance_stats._prefix}timer_for_request_set_cache_test_method_name_4' in performance_stats.registry) self.assertTrue( f'{performance_stats._prefix}timer_for_feature_' f'{mock_feature1.feature_name}' in performance_stats.registry) self.assertTrue( f'{performance_stats._prefix}timer_for_feature_' f'{mock_feature2.feature_name}' in performance_stats.registry)
def test_instance(self, ): from baskerville.models.engine import BaskervilleAnalyticsEngine engine = BaskervilleAnalyticsEngine(RunType.rawlog, self.test_config) self.assertTrue(engine.run_type == RunType.rawlog)