def test_model_shape(config): """Test that the trained model size is expected based on given parameters.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert model_adapter.model.model.shape[0:2] == (2, 2)
def test_output_length(config): """Test that correct number of outputs are generated with Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert len(dist) == 2000
def test_output_values(config): """Test that all distance values in training set are less than or equal to 1 on Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert sum(dist) <= 2000
def test_vocab_length(config): """Check length of processed vocab on on Hadoop_2k.json.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() assert len(model_adapter.w2v_model.model["message"].wv.vocab) == 141
def test_loss_value(config): """Check the loss value is not greater then during testing.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() print(model_adapter.w2v_model.model["message"].get_latest_training_loss()) tl = model_adapter.w2v_model.model["message"].get_latest_training_loss() assert tl < 320000.0
def test_log_similarity(config): """Check that two words have consistent similar logs after training.""" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter=storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) result, dist = tc.execute() log_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterExecutingwithtokens' answer_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterCreatedMRAppMasterforapplicationappattempt' match_1 = [ model_adapter.w2v_model.model["message"].wv.most_similar(log_1)[i][0] for i in range(3) ] assert answer_1 in match_1 log_2 = 'ERRORRMCommunicatorAllocatororgapachehadoopmapreducevapprmRMContainerAllocatorERRORINCONTACTINGRM' answer_2 = 'WARNLeaseRenewermsrabimsrasaorgapachehadoophdfsLeaseRenewerFailedtorenewleaseforDFSClient' \ 'NONMAPREDUCEforsecondsWillretryshortly' match_2 = [ model_adapter.w2v_model.model["message"].wv.most_similar(log_2)[i][0] for i in range(3) ] print(match_2[0]) assert answer_2 in match_2
def test_train_command(self): """Test case for validating that when we train a model and add it to task queue that it will run.""" mgr = Pipeline() config = Configuration() config.STORAGE_DATASOURCE = "local" config.STORAGE_DATASINK = "stdout" config.LS_INPUT_PATH = "validation_data/Hadoop_2k.json" storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None) model_adapter = SomModelAdapter(storage_adapter) tc = SomTrainJob(node_map=2, model_adapter=model_adapter) mgr.add_steps(tc) self.assertEqual(len(mgr), TASKS_IN_QUEUE) self.assertNotEqual(mgr.count, TASKS_IN_QUEUE) mgr.execute_steps() self.assertEqual(mgr.count, TASKS_IN_QUEUE) mgr.clear()