def perform_knn(features, labels, knn_num_neighbors, knn_weights, knn_algorithm, knn_metric, knn_imbalanced_data, test_size, num_test_trials): """ Runs multiple version of knn in parallel with selected features. The number of parallel processes are at most num_test_trials and never exceed number of available cores - 2. """ finished_trials = 0 metrics = list() while finished_trials < num_test_trials: num_processes = min(multiprocessing.cpu_count() - 2, num_test_trials - finished_trials) print 'Running ' + str(num_processes) + ' parallel processes' # Replicate the data for processes replicated_inputs = ( (features, labels, knn_num_neighbors, knn_weights, knn_algorithm, knn_metric, knn_imbalanced_data, test_size), ) * num_processes pool = multiprocessing.Pool(processes=num_processes) metrics.extend(pool.map(perform_single_knn, replicated_inputs)) pool.close() finished_trials += num_processes # Take average of each metric mean_metrics = map(numpy.mean, zip(*metrics)) print("Mean Test Accuracy: %0.2f%%\n" % mean_metrics[5]) return mean_metrics
def perform_random_forest(features, labels, rf_num_trees, rf_criterion, rf_max_features, rf_min_samples_split, rf_min_samples_leaf, scikit_balancing, test_size, num_test_trials): """ Runs multiple version of random forest in parallel with selected features. The number of parallel processes are at most num_test_trials and never exceed number of available cores - 2. """ finished_trials = 0 metrics = list() while finished_trials < num_test_trials: num_processes = min(multiprocessing.cpu_count() - 2, num_test_trials - finished_trials) print 'Running ' + str(num_processes) + ' parallel processes' # Replicate the data for processes replicated_inputs = ( (features, labels, rf_num_trees, rf_criterion, rf_max_features, rf_min_samples_split, rf_min_samples_leaf, scikit_balancing, test_size), ) * num_processes pool = multiprocessing.Pool(processes=num_processes) metrics.extend( pool.map(perform_single_random_forest, replicated_inputs)) pool.close() finished_trials += num_processes # Take average of each metric mean_metrics = map(numpy.mean, zip(*metrics)) print("Mean Test Accuracy: %0.2f%%\n" % mean_metrics[5]) return mean_metrics
def __init__(self, name: Text) -> None: metrics = _build_default_metrics(binary=True) metrics.extend([ Metric('precision', sklearn.metrics.precision_score, binary_only=True), Metric('recall', sklearn.metrics.recall_score, binary_only=True), Metric('f1', sklearn.metrics.f1_score, binary_only=True), ]) super(ClassificationPerformanceMetrics, self).__init__(name, metrics=metrics)
def _build_default_metrics(binary: bool) -> List[Metric]: """Builds and returns the default set of `Metric`s.""" metrics = [ Metric('num', lambda y_true, y_pred: len(y_true), binary_only=binary) ] if binary: metrics.extend([ Metric('auc', sklearn.metrics.roc_auc_score, binary_only=True), Metric('auprc', sklearn.metrics.average_precision_score, binary_only=True), TopPercentileMetric('freq', compute_frequency, binary_only=True, top_percentile=100), ]) for top_percentile in [10, 5, 1]: metrics.append( TopPercentileMetric('freq @{:04.1f}'.format(top_percentile), compute_frequency, binary_only=True, top_percentile=top_percentile)) else: metrics.extend([ Metric('pearson', sp.stats.pearsonr, binary_only=False), Metric('spearman', sp.stats.spearmanr, binary_only=False), Metric('mse', sklearn.metrics.mean_squared_error, binary_only=False), Metric('mae', sklearn.metrics.mean_absolute_error, binary_only=False), ]) return metrics