Пример #1
0
	def test_F1_micro(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_f1_micro = 0.692307693

		actual_f1_micro = cf_matrix.F1_micro()

		self.assertLess(abs(actual_f1_micro - expected_f1_micro), 10e-6)
Пример #2
0
	def test_TP(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_TP = np.array([10, 20, 15])

		actual_TP = cf_matrix._TP()

		self.assertTrue(np.equal(actual_TP, expected_TP).all())
Пример #3
0
	def test_accuracy(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_acc = 0.692307692

		actual_acc = cf_matrix.accuracy()

		self.assertLess(abs(actual_acc - expected_acc), 10e-6)
Пример #4
0
	def test_F1_score(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_f1 = np.array([0.588235288, 0.740740741, 0.714285715])

		actual_f1 = cf_matrix.F1_score()

		self.assertTrue(np.isclose(actual_f1, expected_f1).all())
Пример #5
0
	def test_precision(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_precision = np.array([0.6666667, 0.740740741, 0.652173913])

		actual_precision = cf_matrix.precision()

		self.assertTrue(np.isclose(actual_precision, expected_precision).all())
Пример #6
0
	def test_FN(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_FN = np.array([9, 7, 4])

		actual_FN = cf_matrix._FN()

		self.assertTrue(np.equal(actual_FN, expected_FN).all())
Пример #7
0
	def test_FP(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX
		expected_FP = np.array([5, 7, 8])

		actual_FP = cf_matrix._FP()

		self.assertTrue(np.equal(actual_FP, expected_FP).all())
Пример #8
0
	def test_recall(self):
		cf_matrix = ConfusionMatrix(3)
		cf_matrix.matrix = TEST_MATRIX

		expected_recall = np.array([0.526315789, 0.740740741, 0.789473684])

		actual_recall = cf_matrix.recall()

		self.assertTrue(np.isclose(actual_recall, expected_recall).all())
Пример #9
0
def run_experiment(dataset_file,
                   target_column,
                   specs,
                   onehot=False,
                   regression=False,
                   verbose=False,
                   check_gradient=False):

    lr = specs[1]
    batch_size = specs[2]
    k_count = specs[3]
    lambd = specs[4]
    network_specs = specs[0]
    epochs = EPOCHS

    print('========== Starting Experiment ===========')
    print('Learning Rate: \t\t %.5f' % lr)
    print('Batch Size: \t\t %d' % batch_size)
    print('K-Folds: \t\t %d' % k_count)
    print('Lambda: \t\t %.4f' % lambd)
    print('Netowrk Architecture: \t %s' % "IN " +
          "-".join([str(x['size']) for x in network_specs]) + " OUT")

    kfolds = DatasetBuilder.read_dataset_from_csv_as_kfold(
        dataset_file, target_column, k_count)

    results = {'specs': specs, 'folds': []}

    for fold_idx, (train, test) in enumerate(kfolds.get_folds()):
        print('------ Starting Fold %d ----------' % (fold_idx + 1))
        fold_results = {'epochs': {}}
        results['folds'].append(fold_results)

        nnet = NetworkBuilder.build_network_from_specs(lambd, lr,
                                                       network_specs,
                                                       regression)
        dataloader = DataLoader(train.drop(target_column, axis=1).values,
                                train[[target_column]].values,
                                batch_size,
                                shuffle=True,
                                onehot=onehot)
        prev_loss = 99999999

        loss = 0
        loss_count = 0
        for epoch in range(epochs):
            for data in dataloader:
                X, Y = data
                pred = nnet.forward(X)
                loss += nnet.loss(pred, Y)
                loss_count += 1
                nnet.backprop(Y)

            loss /= loss_count
            if (verbose):
                print("Epoch: %d \t Loss: %f" % (epoch, loss))
                fold_results['epochs'][epoch] = {
                    'loss': loss,
                }

            if abs(loss - prev_loss) < EPSILON:
                if verbose:
                    print('stopped because of small loss gain')
                break
            else:
                prev_loss = loss
                loss = 0
                loss_count = 0

        X_test = test.drop(target_column, axis=1).values
        Y_test = test[[target_column]].values
        Y_pred = nnet.forward(X_test)

        if not regression:
            cf_matrix = ConfusionMatrix(train[target_column].max() + 1)
            cf_matrix.update(Y_pred, Y_test, onehot)

            fold_results['cf_matrix'] = cf_matrix.matrix.tolist()
            fold_results['accuracy'] = cf_matrix.accuracy()
            fold_results['f1_macro'] = cf_matrix.F1_macro()
        else:
            fold_results['rmse'] = RegressionMetrics.rmse(Y_pred, Y_test)
            fold_results['mse'] = RegressionMetrics.mse(Y_pred, Y_test)
            fold_results['mean_error'] = RegressionMetrics.mean_error(
                Y_pred, Y_test)
            print(fold_results)

    if not regression:
        results['accuracy'] = sum([f['accuracy'] for f in results['folds']
                                   ]) / len(results['folds'])
        results['f1_macro'] = sum([f['f1_macro'] for f in results['folds']
                                   ]) / len(results['folds'])
    else:
        results['rmse'] = sum([f['rmse'] for f in results['folds']]) / len(
            results['folds'])

    return results