def test_training_infer(config): """Test anomaly detection training on public dataset.""" model_adapter = SomModelAdapter( SomStorageAdapter(config=config, feedback_strategy=None)) tc_train = SomTrainCommand(node_map=2, model_adapter=model_adapter, recreate_model=True) result, dist = tc_train.execute() assert result == 0 model_adapter = SomModelAdapter( SomStorageAdapter(config=config, feedback_strategy=None)) tc_infer = SomInferCommand(model_adapter=model_adapter, sleep=False) result = tc_infer.execute() assert result == 0
def test_model_shape(config): """Test that the trained model size is expected based on given parameters.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert model_adapter.model.model.shape[0:2] == (2, 2)
def test_output_length(config): """Test that correct number of outputs are generated with Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert len(dist) == 2000
def test_output_values(config): """Test that all distance values in training set are less than or equal to 1 on Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert sum(dist) <= 2000
def test_end2endtraining(config): """Test anomaly detection training on public dataset.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainCommand(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert result == 0
def __init__(self, config, feedback_strategy=None): """Abstraction around model adapter run method.""" if feedback_strategy is None: feedback_strategy = FeedbackStrategy(config=config) storage_adapter = SomStorageAdapter(config, feedback_strategy) self.__model_adapter = SomModelAdapter(storage_adapter) self.mgr = TaskQueue()
def test_log_similarity(cnf_hadoop2k_w2v_params): """Check that two words have consistent similar logs after training.""" storage_adapter = SomStorageAdapter(config=cnf_hadoop2k_w2v_params, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() log_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterExecutingwithtokens' answer_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterCreatedMRAppMasterforapplicationappattempt' match_1 = [ model_adapter.w2v_model.model["message"].wv.most_similar(log_1)[i][0] for i in range(3) ] assert answer_1 in match_1 log_2 = 'ERRORRMCommunicatorAllocatororgapachehadoopmapreducevapprmRMContainerAllocatorERRORINCONTACTINGRM' answer_2 = 'WARNLeaseRenewermsrabimsrasaorgapachehadoophdfsLeaseRenewerFailedtorenewleaseforDFSClient' \ 'NONMAPREDUCEforsecondsWillretryshortly' match_2 = [ model_adapter.w2v_model.model["message"].wv.most_similar(log_2)[i][0] for i in range(3) ] logging.info(match_2[0]) assert answer_2 in match_2
def test_vocab_length(config): """Check length of processed vocab on on Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert len(model_adapter.w2v_model.model["message"].wv.vocab) == 141
def test_loss_value(config): """Check the loss value is not greater then during testing.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() print(model_adapter.w2v_model.model["message"].get_latest_training_loss()) tl = model_adapter.w2v_model.model["message"].get_latest_training_loss() assert tl < 320000.0
class AnomalyDetectorFacade: """For external interface for integration different adapters for custom models and training logic.""" def __init__(self, config): """Abstraction around model adapter run method.""" storage_adapter = SomStorageAdapter(config) self.__model_adapter = SomModelAdapter(storage_adapter) def run(self, single_run=False): """Abstraction around model adapter run method.""" self.__model_adapter.run(single_run=single_run) def train(self, node_map=24, false_positives=None): """Abstraction around model adapter train method.""" return self.__model_adapter.train(node_map, false_positives) def infer(self, false_positives=None): """Abstraction around model adapter inference method.""" return self.__model_adapter.infer(false_positives)
def get_score(config, node_map, feedback): """Simple utility function for injecting custom mock function into Detector.""" feedback_strategy = FeedbackStrategy(config, fn=feedback) storage_adapter = SomStorageAdapter(config=config, feedback_strategy=feedback_strategy) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainCommand(node_map=node_map, model_adapter=model_adapter) success, dist = tc.execute() freq_one = dist[-1] return freq_one
def test_train_command(cnf_hadoop_2k, pipeline): """Test case for validating that when we train a model and add it to task queue that it will run.""" storage_adapter = SomStorageAdapter(config=cnf_hadoop_2k, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter) train_job = SomTrainJob(node_map=2, model_adapter=model_adapter) pipeline.add_steps(train_job) assert len(pipeline) == TASKS_IN_QUEUE assert pipeline.count != TASKS_IN_QUEUE pipeline.execute_steps() assert pipeline.count == TASKS_IN_QUEUE
def __init__(self, config, feedback_strategy=None, tracing_enabled=False): """Set up required properties to run training or prediction. :param config: configuration provided via yaml or environment variables :param feedback_strategy: a function that runs to improve the feedback of system """ if feedback_strategy is None: feedback_strategy = FeedbackStrategy(config=config) storage_adapter = SomStorageAdapter(config, feedback_strategy) self.__model_adapter = SomModelAdapter(storage_adapter) self.tasks = TaskQueue() self.tracing_enabled = tracing_enabled
def test_train_command(self): """Test case for validating that when we train a model and add it to task queue that it will run.""" mgr = TaskQueue() config = Configuration(config_yaml="config_files/.env_config.yaml") storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter) tc = SomTrainCommand(node_map=2, model_adapter=model_adapter) mgr.add_steps(tc) self.assertEqual(len(mgr), TASKS_IN_QUEUE) self.assertNotEqual(mgr.count, TASKS_IN_QUEUE) mgr.execute_steps() self.assertEqual(mgr.count, TASKS_IN_QUEUE) mgr.clear()
def test_train_command(self): """Test case for validating that when we train a model and add it to task queue that it will run.""" mgr = DetectorPipeline() config = Configuration() config.STORAGE_DATASOURCE = "local" config.STORAGE_DATASINK = "stdout" config.LS_INPUT_PATH = "validation_data/Hadoop_2k.json" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) mgr.add_steps(tc) self.assertEqual(len(mgr), TASKS_IN_QUEUE) self.assertNotEqual(mgr.count, TASKS_IN_QUEUE) mgr.execute_steps() self.assertEqual(mgr.count, TASKS_IN_QUEUE) mgr.clear()
def __init__(self, config): """Abstraction around model adapter run method.""" storage_adapter = SomStorageAdapter(config) self.__model_adapter = SomModelAdapter(storage_adapter)