def get_penalties(model, labeling_model, typos: bool, token_errors: float, two_pass: bool = False) -> Tuple[float, float]: if (not two_pass) and token_errors == INF: return 0, 0 benchmark_name = get_benchmark_name(noise_level=0.1 if typos else 0, p=token_errors) penalty_name = get_penalty_name(model, labeling_model) holder = PenaltyHolder(seq_acc=True) penalties = holder.get(penalty_name, benchmark_name) return penalties
def load_labeling_corrector(robust: bool, typos: bool, p: float, model: Optional[BidirectionalLabelingEstimator] = None) -> LabelingCorrector: if model is None: model = load_bidirectional_model(robust) model_name = model.specification.name holder = ThresholdHolder(FittingMethod.LABELING) threshold_benchmark_name = get_benchmark_name(0.1 if typos else 0, p) insertion_threshold, deletion_threshold = holder.get_thresholds(model_name, noise_type=threshold_benchmark_name) corrector = LabelingCorrector(model_name, insertion_threshold, deletion_threshold, model=model) return corrector
def get_two_pass_benchmark(noise_level: float, p: float, subset: Subset, file_name: str): name = get_benchmark_name(noise_level, p) return TwoPassBenchmark(name, file_name, subset)
NOISE_LEVELS = [0, 0.1, 0.2] APPROACHES = [ "combined", "combined_robust", "softmax", "softmax_robust", "sigmoid", "sigmoid_robust", "beam_search", "beam_search_robust", "bicontext", "dp_fixer", "dynamic_bi", "greedy", "enchant", "do_nothing" ] NAME_LEN = 21 METRICS = [Metric.F1, Metric.SEQUENCE_ACCURACY] if __name__ == "__main__": holder = ResultsHolder() for approach in APPROACHES: print_str = " " print_str += approach.replace('_', ' ') print_str += ' ' * (NAME_LEN - len(approach)) for noise_level in NOISE_LEVELS: for metric in METRICS: values = [ holder.get(get_benchmark_name(noise_level, p), Subset.TEST, approach, metric) for p in get_error_probabilities() ] if 0 in values: mean = 0 else: mean = float(np.mean(values)) print_str += "& %.2f\\,\\%% " % (mean * 100) print_str += "\\\\" print(print_str)
] getter = ParameterGetter(params) getter.print_help() parameters = getter.get() import numpy as np from src.evaluation.results_holder import ResultsHolder, Metric from src.benchmark.benchmark import get_benchmark_name, get_error_probabilities, Subset if __name__ == "__main__": approach = parameters["approach"] holder = ResultsHolder() metrics = [Metric.F1, Metric.SEQUENCE_ACCURACY, Metric.MEAN_RUNTIME] values = {metric: [] for metric in metrics} for p in get_error_probabilities(): benchmark_name = get_benchmark_name(parameters["noise_level"], p) benchmark_values = [] for metric in metrics: value = holder.get(benchmark_name, Subset.TEST, approach, metric) benchmark_values.append(value) values[metric].append(value) print_name = benchmark_name[:7] print_name += ' ' * (7 - len(print_name)) print(print_name, ' '.join(str(value) for value in benchmark_values)) for metric in metrics: metric_values = values[metric] print( metric, "mean = %.4f (min = %.4f, max = %.4f)" % (np.mean(metric_values), min(metric_values), max(metric_values)))
import project from src.evaluation.evaluator import Evaluator from src.benchmark.benchmark import Benchmark, Subset, BenchmarkFiles, get_benchmark_name, NOISE_LEVELS, \ ERROR_PROBABILITIES from src.evaluation.results_holder import ResultsHolder, Metric if __name__ == "__main__": file_name = sys.argv[1] approach_name = sys.argv[2] results_holder = ResultsHolder() for noise_level in NOISE_LEVELS: for p in ERROR_PROBABILITIES: benchmark_name = get_benchmark_name(noise_level, p) benchmark_subset = Subset.TEST print(benchmark_name) benchmark = Benchmark(benchmark_name, benchmark_subset) sequence_pairs = benchmark.get_sequence_pairs(BenchmarkFiles.CORRUPT) if file_name == "corrupt.txt": predicted_sequences = benchmark.get_sequences(BenchmarkFiles.CORRUPT) mean_runtime = 0 else: try: predicted_sequences = benchmark.get_predicted_sequences(file_name)[:len(sequence_pairs)] mean_runtime = benchmark.get_mean_runtime(file_name) except FileNotFoundError: predicted_sequences = [] mean_runtime = 0
getter = ParameterGetter(params) getter.print_help() parameters = getter.get() import numpy as np from src.evaluation.results_holder import ResultsHolder, Metric from src.benchmark.benchmark import get_error_probabilities, get_benchmark_name, Subset if __name__ == "__main__": noise_level = parameters["noise_level"] approach = parameters["approach"] holder = ResultsHolder() for metric in (Metric.F1, Metric.SEQUENCE_ACCURACY): approach_vals = [] best_other_vals = [] for p in get_error_probabilities(): values = holder.results[get_benchmark_name(noise_level, p)][Subset.TEST] approach_vals.append(values[approach][metric]) best_other_vals.append( max(values[other][metric] for other in values if other != approach)) print(metric) approach_mean = np.mean(approach_vals) other_mean = np.mean(best_other_vals) print(approach_mean, approach_vals) print(other_mean, best_other_vals) print("diff = %.4f" % (approach_mean - other_mean))