def test_classification_error(self): y = np.array([0, 1, 1, 0]) ypred = np.array([0.1, 0.9, 0.4, 0.2]) self.assertTrue(abs(evaluation.classification_error(y, ypred) - 0.25) < 1e-12) self.assertTrue(abs(evaluation.classification_error(y, ypred, thres=0.3 ) - 0.0) < 1e-12) weights = np.array([1.0, 0.8, 0.7, 0.6]) self.assertTrue(abs(evaluation.classification_error(y, ypred, weights=weights) - (1.0 - (1.0 + 0.8 + 0.6) / (weights.sum()))) < 1.0e-12)
def test_classification_error(self): y = np.array([0, 1, 1, 0]) ypred = np.array([0.1, 0.9, 0.4, 0.2]) self.assertTrue( abs(evaluation.classification_error(y, ypred) - 0.25) < 1e-12) self.assertTrue( abs(evaluation.classification_error(y, ypred, thres=0.3) - 0.0) < 1e-12) weights = np.array([1.0, 0.8, 0.7, 0.6]) self.assertTrue( abs( evaluation.classification_error(y, ypred, weights=weights) - (1.0 - (1.0 + 0.8 + 0.6) / (weights.sum()))) < 1.0e-12)
def accuracy_auc(y, ypred, weights=None): """Compute the accuracy, AUC, precision, recall and f1""" from mozsci.evaluation import classification_error, auc_wmw_fast, precision_recall_f1 prf1 = precision_recall_f1(y, ypred, weights=weights) return {'accuracy': 1.0 - classification_error(y, ypred, weights=weights), 'auc': auc_wmw_fast(y, ypred, weights=weights), 'precision': prf1[0], 'recall': prf1[1], 'f1': prf1[2]}
def test_model_driver(self): independents = [ variables.Variable('x0', IdentityTransformer()), variables.Variable('x1', LogScaledTransformer()) ] dependents = [variables.Variable('y', IdentityTransformer())] model_variables = variables.ModelVariables(independents, dependents) # make some test data N = int(1e5) data = np.zeros(N, dtype=[('x0', np.float), ('x1', np.float), ('y', np.int)]) np.random.seed(5) data['x0'] = np.random.rand(N) data['x1'] = np.random.normal(0.5, 2.0, N) data['y'] = 3 * data['x0'] - 2 * data['x1'] - 1.5 > 0.0 # rescale x1 data['x1'] = np.exp(data['x1']) # create driver and fit model = variables.ModelDriver(model_variables, LogisticRegression(C=1e5)) # first try to fit with regular numpy arrays X = data.view(dtype=np.float).reshape(-1, 3)[:, :2] y = data.view(dtype=np.int).reshape(-1, 3)[:, 2].reshape(-1, 1) model.fit(X, y) ypred = model.predict(X) self.assertTrue(classification_error(y, ypred) < 0.002) # now try using __getitem__ model.fit(data, data) ypred = model.predict(data) self.assertTrue(classification_error(data['y'], ypred) < 0.002) # serialization model_string = model.dumps() model_loaded = variables.ModelDriver.loads(model_string) self.assertTrue( np.allclose(model.predict(data, predict_prob=True), model_loaded.predict(data, predict_prob=True)))
def test_fit(self): from mozsci.evaluation import classification_error np.random.seed(5) N = int(1e5) x = np.random.rand(N, 2) y = (3 * x[:, 0] - 2 * x[:, 1] - 1.5 > 0.0).astype(np.int) lr = LogisticRegression() lr.fit(x, y, factr=1e4) ypred = lr.predict(x) self.assertTrue(classification_error(y, ypred) < 0.002)
def test_map_train_model(self): trainer = TrainModelCV([LogisticRegression, classification_error, '/tmp/logistic.json', (), {'lam':0.5}], X=self.X, y=self.y) errors = trainer.run() # load model trained_model = LogisticRegression.load_model('/tmp/logistic.json') loaded_model_error = classification_error(self.y, trained_model.predict(self.X)) # check the errors self.assertTrue(np.abs(errors[errors.keys()[0]]['train'] - 0.06) < 1e-12) self.assertTrue(np.abs(errors[errors.keys()[0]]['train'] - loaded_model_error) < 1e-12)
def test_model_driver(self): independents = [ variables.Variable('x0', IdentityTransformer()), variables.Variable('x1', LogScaledTransformer()) ] dependents = [variables.Variable('y', IdentityTransformer())] model_variables = variables.ModelVariables(independents, dependents) # make some test data N = int(1e5) data = np.zeros( N, dtype=[('x0', np.float), ('x1', np.float), ('y', np.int)]) np.random.seed(5) data['x0'] = np.random.rand(N) data['x1'] = np.random.normal(0.5, 2.0, N) data['y'] = 3 * data['x0'] - 2 * data['x1'] - 1.5 > 0.0 # rescale x1 data['x1'] = np.exp(data['x1']) # create driver and fit model = variables.ModelDriver(model_variables, LogisticRegression(C=1e5)) # first try to fit with regular numpy arrays X = data.view(dtype=np.float).reshape(-1, 3)[:, :2] y = data.view(dtype=np.int).reshape(-1, 3)[:, 2].reshape(-1, 1) model.fit(X, y) ypred = model.predict(X) self.assertTrue(classification_error(y, ypred) < 0.002) # now try using __getitem__ model.fit(data, data) ypred = model.predict(data) self.assertTrue(classification_error(data['y'], ypred) < 0.002) # serialization model_string = model.dumps() model_loaded = variables.ModelDriver.loads(model_string) self.assertTrue(np.allclose( model.predict(data, predict_prob=True), model_loaded.predict(data, predict_prob=True)))
def test_map_train_model(self): trainer = TrainModelCV([ LogisticRegression, classification_error, '/tmp/logistic.json', (), { 'lam': 0.5 } ], X=self.X, y=self.y) errors = trainer.run() # load model trained_model = LogisticRegression.load_model('/tmp/logistic.json') loaded_model_error = classification_error( self.y, trained_model.predict(self.X)) # check the errors self.assertTrue( np.abs(errors[errors.keys()[0]]['train'] - 0.06) < 1e-12) self.assertTrue( np.abs(errors[errors.keys()[0]]['train'] - loaded_model_error) < 1e-12)
def agg_err(yactual, ypred): ret = {} ret['accuracy'] = classification_error(yactual, ypred) ret['auc'] = auc_wmw_fast(yactual, ypred) return ret