def __init__(self, queries=None, articles=None): """ Creates a new tester instance and loads necessary data :return: None """ self.dataSource = TestDataSource(queries=queries, articles=articles) self.query_articles = self.dataSource.get_query_articles() self.results = self.dataSource.get_validation_results() self.article_ids = self.dataSource.get_articles() self.query_ids = self.dataSource.get_queries() self.algorithms = self.dataSource.get_algorithms() self.results_by_algorithm = self.dataSource.separate_algorithm_data() self.alg_testers = [] for algorithm in self.algorithms: algorithm_id = algorithm["id"] algorithm_name = algorithm["algorithm"] self.alg_testers.append(AlgorithmTester(algorithm_id, algorithm_name, tester_datasource=self.dataSource))
def __init__(self, algorithm_id, algorithm_name, tester_datasource=TestDataSource()): """ Creates a test environment for a given algorithm :param algorithm_id id of algorithm of interest :param algorithm_name name of algorithm of interest :param tester_datasource: tester datasouce instance to help us get data, default new :return: """ self.algorithm_id = algorithm_id self.algorithm_name = algorithm_name self.dataSource = tester_datasource self.query_articles = self.dataSource.get_query_articles() self.results = self.dataSource.get_validation_results() self.article_ids = self.dataSource.get_articles() self.query_ids = self.dataSource.get_queries() self.algorithm_results = tester_datasource.get_results_by_algorithms( self.algorithm_id) self.get_best_threshold_for_algorithm() #controls for variation in "randomness" random.seed(10)
class Tester: def __init__(self, queries=None, articles=None): """ Creates a new tester instance and loads necessary data :return: None """ self.dataSource = TestDataSource(queries=queries, articles=articles) self.query_articles = self.dataSource.get_query_articles() self.results = self.dataSource.get_validation_results() self.article_ids = self.dataSource.get_articles() self.query_ids = self.dataSource.get_queries() self.algorithms = self.dataSource.get_algorithms() self.results_by_algorithm = self.dataSource.separate_algorithm_data() self.alg_testers = [] for algorithm in self.algorithms: algorithm_id = algorithm["id"] algorithm_name = algorithm["algorithm"] self.alg_testers.append(AlgorithmTester(algorithm_id, algorithm_name, tester_datasource=self.dataSource)) def test_all(self): """ Runs test on all algorithms and plots results :return: None """ labels = [] Y_vals = [] X_vals = [] for alg_tester in self.alg_testers: alg_tester.get_best_threshold_for_algorithm() X, Y, f1 = alg_tester.test() X_vals.append(X) Y_vals.append(Y) labels.append(alg_tester.algorithm_name) x_label = "Best Threshold Found" y_label = "F1 Measure" title = "F1 Measure with different thresholds for different algorithms" self.plot_threshold_and_results_multi_algorithm(X_vals, labels, Y_vals, x_label, y_label, title) def bootstrap_all(self): """ Bootstraps data for every algorithm :return: """ for alg_tester in self.alg_testers: alg_tester.bootstrap() @staticmethod def plot_threshold_and_results_multi_algorithm(x_vals, data_labels, y_vals, x_label, y_label, title): """ Plots threshold and F1 data for multiple algorithms :param x_vals: the X values of the points to plot :param data_labels: the labels for the legend :param y_vals: the Y values of the points to plot :param x_label: the X axis label :param y_label: the Y axis label :param title: the title for the plot :return: None """ colors = ["red", "blue", "green", "orange", "purple", "pink", "yellow"] color_index = 0 key_legends = [] for y_i, y in enumerate(y_vals): plot.scatter(x_vals[y_i], y, color=colors[color_index]) # If labels for key legend exist if data_labels is not None: legend = mpatches.Patch(color=colors[color_index], label=data_labels[y_i]) key_legends.append(legend) color_index = (color_index + 1) % len(colors) # Only display key legend when we want to if data_labels is not None: plot.legend(handles=key_legends) plot.title(title) plot.xlabel(x_label) plot.ylabel(y_label) plot.show() def get_best_thresholds_for_all(self): for alg_tester in self.alg_testers: threshold = alg_tester.get_best_threshold_for_algorithm() print("Best threshold for {}: {}".format(alg_tester.algorithm_name, threshold)) def perform_hypothesis_tests_for_all(self, graph=False): for alg_tester in self.alg_testers: alg_tester.hypothesis_test(graph)