def test_F1_macro(self): cf_matrix = ConfusionMatrix(3) cf_matrix.matrix = TEST_MATRIX expected_f1_macro = 0.681087248 actual_f1_macro = cf_matrix.F1_macro() self.assertLess(abs(actual_f1_macro - expected_f1_macro), 10e-6)
def run_experiment(dataset_file, target_column, specs, onehot=False, regression=False, verbose=False, check_gradient=False): lr = specs[1] batch_size = specs[2] k_count = specs[3] lambd = specs[4] network_specs = specs[0] epochs = EPOCHS print('========== Starting Experiment ===========') print('Learning Rate: \t\t %.5f' % lr) print('Batch Size: \t\t %d' % batch_size) print('K-Folds: \t\t %d' % k_count) print('Lambda: \t\t %.4f' % lambd) print('Netowrk Architecture: \t %s' % "IN " + "-".join([str(x['size']) for x in network_specs]) + " OUT") kfolds = DatasetBuilder.read_dataset_from_csv_as_kfold( dataset_file, target_column, k_count) results = {'specs': specs, 'folds': []} for fold_idx, (train, test) in enumerate(kfolds.get_folds()): print('------ Starting Fold %d ----------' % (fold_idx + 1)) fold_results = {'epochs': {}} results['folds'].append(fold_results) nnet = NetworkBuilder.build_network_from_specs(lambd, lr, network_specs, regression) dataloader = DataLoader(train.drop(target_column, axis=1).values, train[[target_column]].values, batch_size, shuffle=True, onehot=onehot) prev_loss = 99999999 loss = 0 loss_count = 0 for epoch in range(epochs): for data in dataloader: X, Y = data pred = nnet.forward(X) loss += nnet.loss(pred, Y) loss_count += 1 nnet.backprop(Y) loss /= loss_count if (verbose): print("Epoch: %d \t Loss: %f" % (epoch, loss)) fold_results['epochs'][epoch] = { 'loss': loss, } if abs(loss - prev_loss) < EPSILON: if verbose: print('stopped because of small loss gain') break else: prev_loss = loss loss = 0 loss_count = 0 X_test = test.drop(target_column, axis=1).values Y_test = test[[target_column]].values Y_pred = nnet.forward(X_test) if not regression: cf_matrix = ConfusionMatrix(train[target_column].max() + 1) cf_matrix.update(Y_pred, Y_test, onehot) fold_results['cf_matrix'] = cf_matrix.matrix.tolist() fold_results['accuracy'] = cf_matrix.accuracy() fold_results['f1_macro'] = cf_matrix.F1_macro() else: fold_results['rmse'] = RegressionMetrics.rmse(Y_pred, Y_test) fold_results['mse'] = RegressionMetrics.mse(Y_pred, Y_test) fold_results['mean_error'] = RegressionMetrics.mean_error( Y_pred, Y_test) print(fold_results) if not regression: results['accuracy'] = sum([f['accuracy'] for f in results['folds'] ]) / len(results['folds']) results['f1_macro'] = sum([f['f1_macro'] for f in results['folds'] ]) / len(results['folds']) else: results['rmse'] = sum([f['rmse'] for f in results['folds']]) / len( results['folds']) return results