def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) if args.random_seed is not None: np.random.seed(args.random_seed) random.seed(args.random_seed + 1) two_tailed = args.num_tails == 2 alpha = 1.0 - args.confidence / 100 results = run_hypothesis_tests( args.metrics_jsonl_files, args.dependent_metric, args.metric_A, args.metric_B, args.summarizer_type, test_method=args.hypothesis_test, alpha=alpha, two_tailed=two_tailed, skip_summary_level=args.skip_summary_level, skip_system_level=args.skip_system_level, skip_global=args.skip_global) if args.output_file: dirname = os.path.dirname(args.output_file) if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_file, 'w') as out: out.write(json.dumps(results, indent=2)) if not args.silent: logger.info(json.dumps(results, indent=2))
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) metric1, metric2 = args.metrics two_tailed = args.num_tails == 2 alpha = 1.0 - args.confidence / 100 ci_kwargs = json.loads(args.confidence_interval_kwargs) results = compute_correlation( args.metrics_jsonl_files, metric1, metric2, args.summarizer_type, skip_summary_level=args.skip_summary_level, skip_system_level=args.skip_system_level, skip_global=args.skip_global, system_level_output_plot=args.system_level_output_plot, global_output_plot=args.global_output_plot, ci_method=args.confidence_interval_method, alpha=alpha, two_tailed=two_tailed, ci_kwargs=ci_kwargs) if args.output_file: dirname = os.path.dirname(args.output_file) if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_file, 'w') as out: out.write(json.dumps(results, indent=2)) if not args.silent: logger.info(json.dumps(results, indent=2))
def run_score(self, args: argparse.Namespace) -> None: prepare_global_logging(file_path=args.log_file, silent=args.silent) dataset_reader = get_dataset_reader_from_argument(args.dataset_reader) metric = get_metric_from_arguments(self.metric_type, args) input_files = args.input_files instances = dataset_reader.read(*input_files) metrics_dicts = score_instances(instances, [metric]) save_score_results(metrics_dicts, args.output_jsonl, args.silent)
def run_evaluate(self, args: argparse.Namespace) -> None: prepare_global_logging(file_path=args.log_file, silent=args.silent) dataset_reader = get_dataset_reader_from_argument(args.dataset_reader) metric = get_metric_from_arguments(self.metric_type, args) input_files = args.input_files instances = dataset_reader.read(*input_files) macro, micro_list = evaluate_instances(instances, [metric]) save_evaluation_results(macro, micro_list, args.macro_output_json, args.micro_output_jsonl, args.silent)
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) results = run_all_partial_conjunction_pvalue_test( args.method, args.pvalue_json_files, args.names, alpha=args.alpha) if args.output_file: dirname = os.path.dirname(args.output_file) if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_file, 'w') as out: out.write(json.dumps(results, indent=2)) if not args.silent: logger.info(json.dumps(results, indent=2))
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) import_module_and_submodules('sacrerouge') include_packages = args.include_packages or [] for package in include_packages: import_module_and_submodules(package) params = Params.from_file(args.config, args.overrides) dataset_reader = DatasetReader.from_params(params.pop('dataset_reader')) metrics = _load_metrics(params) input_files = params.pop('input_files') if isinstance(input_files, str): input_files = [input_files] instances = dataset_reader.read(*input_files) metrics_dicts = score_instances(instances, metrics, args.disable_peer_jackknifing) save_score_results(metrics_dicts, args.output_jsonl, args.silent)
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) correlations_A = json.load(open(args.summary_level_correlations_A, 'r')) correlations_B = json.load(open(args.summary_level_correlations_B, 'r')) results = run_wilcoxon_tests(correlations_A, correlations_B, alternative=args.alternative) if args.output_file: dirname = os.path.dirname(args.output_file) if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_file, 'w') as out: out.write(json.dumps(results, indent=2)) if not args.silent: logger.info(json.dumps(results, indent=2))
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) import_module_and_submodules('sacrerouge') include_packages = args.include_packages or [] for package in include_packages: import_module_and_submodules(package) params = Params.from_file(args.config, args.overrides) dataset_reader = DatasetReader.from_params( params.pop('dataset_reader')) metrics = load_metrics(params) input_files = params.pop('input_files') if isinstance(input_files, str): input_files = [input_files] instances = dataset_reader.read(*input_files) macro, micro_list = evaluate_instances(instances, metrics) save_evaluation_results(macro, micro_list, args.macro_output_json, args.micro_output_jsonl, args.silent)
def run(self, args): prepare_global_logging(file_path=args.log_file, silent=args.silent) metric1, metric2 = args.metrics return_all_summary_level = args.summary_level_correlations_output is not None results = compute_correlation( args.metrics_jsonl_files, metric1, metric2, args.summarizer_type, return_all_summary_level=return_all_summary_level, skip_summary_level=args.skip_summary_level, skip_system_level=args.skip_system_level, skip_global=args.skip_global, system_level_output_plot=args.system_level_output_plot, global_output_plot=args.global_output_plot) # Strip off the original results from the individual summary correlations if return_all_summary_level: results, all_summary_level = results if args.output_file: dirname = os.path.dirname(args.output_file) if dirname: os.makedirs(dirname, exist_ok=True) with open(args.output_file, 'w') as out: out.write(json.dumps(results, indent=2)) if not args.silent: logger.info(json.dumps(results, indent=2)) # Save the individual summary-level correlations if the output file is provided. `all_summary_level` # should only be defined if `return_all_summary_level` is true if return_all_summary_level: with open(args.summary_level_correlations_output, 'w') as out: out.write(json.dumps(all_summary_level, indent=2))