def compute_mma_results(jam_file, annotators, trim, bins=250, gt=False): """Compute the Mean Measure Agreement for all the algorithms of the given file jam_file. Parameters ---------- jam_file: str Jam file containing all the annotations for a given track. annotators: dict Dictionary containing the names and e-mail addresses of the 5 different annotators. trim: boolean Whether to trim the first and last boundary or not. bins: int The number of bins to compute for the Information Gain. Returns ------- results_mma: np.array All the results for all the different comparisons between algorithms. In order to obtain the average, simply take the mean across axis=0. """ context = "large_scale" results_mma = [] if gt: keys = annotators.keys() else: keys = annotators.keys()[1:] for names in itertools.combinations(keys, 2): # Read estimated times from both algorithms if names[0] == "GT": ds_name = os.path.basename(jam_file).split("_")[0] ann_context = MSAF.prefix_dict[ds_name] est_inters1, est_labels1 = jams2.converters.load_jams_range( jam_file, "sections", annotator=0, context=ann_context) else: est_inters1, est_labels1 = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=names[0], context=context) est_inters2, est_labels2 = jams2.converters.load_jams_range(jam_file, "sections", annotator_name=names[1], context=context) if est_inters1 == [] or est_inters2 == []: continue # Compute results #print est_inters1, est_inters2, jam_file, names #print names results = EV.compute_results(est_inters1, est_inters2, trim, bins, jam_file) results_mma.append(results) return np.asarray(results_mma)
def compute_mma_results(jam_file, annotators, trim, bins=250, gt=False): """Compute the Mean Measure Agreement for all the algorithms of the given file jam_file. Parameters ---------- jam_file: str Jam file containing all the annotations for a given track. annotators: dict Dictionary containing the names and e-mail addresses of the 5 different annotators. trim: boolean Whether to trim the first and last boundary or not. bins: int The number of bins to compute for the Information Gain. Returns ------- results_mma: np.array All the results for all the different comparisons between algorithms. In order to obtain the average, simply take the mean across axis=0. """ context = "large_scale" results_mma = [] if gt: keys = annotators.keys() else: keys = annotators.keys()[1:] for names in itertools.combinations(keys, 2): # Read estimated times from both algorithms if names[0] == "GT": ds_name = os.path.basename(jam_file).split("_")[0] ann_context = MSAF.prefix_dict[ds_name] est_inters1, est_labels1 = jams2.converters.load_jams_range( jam_file, "sections", annotator=0, context=ann_context) else: est_inters1, est_labels1 = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=names[0], context=context) est_inters2, est_labels2 = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=names[1], context=context) if est_inters1 == [] or est_inters2 == []: continue # Compute results #print est_inters1, est_inters2, jam_file, names #print names results = EV.compute_results(est_inters1, est_inters2, trim, bins, jam_file) results_mma.append(results) return np.asarray(results_mma)
def compute_mgp(jams_files, annotators, trim): """Computes the Mean Ground-truth Performance of the experiment results. For all the jams files of the experiment, it compares the results with the ground truth (also contained in the jam file), and computes the Mean Ground-truth Performance across them. It also reads the MGP of the machines, and plots them along with the humans results. Parameters ---------- jams_files: list List containing all the file paths to the experiment files. annotators: dict Dictionary containing the names and e-mail addresses of the 5 different annotators. trim: boolean Whether to trim the first and last boundaries. Returns ------- mgp_results: np.array Array containing the results as in the eval module of MSAF for the humans performance. """ mgp_results = np.empty((0, 9)) est_context = "large_scale" bins = 250 for i in xrange(1, len(annotators.keys())): FPR = np.empty((0, 9)) for jam_file in jams_files: ds_name = os.path.basename(jam_file).split("_")[0] ann_context = MSAF.prefix_dict[ds_name] ann_times, ann_labels = jams2.converters.load_jams_range(jam_file, "sections", annotator_name=annotators.keys()[0], context=ann_context) try: est_times, est_labels = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=annotators.keys()[i], context=est_context) except: logging.warning("Couldn't read annotator %d in JAMS %s" % (i, jam_file)) continue if len(ann_times) == 0: logging.warning("No GT annotations in file %s" % jam_file) continue if len(est_times) == 0: logging.warning("No annotation in file %s for annotator %s." % (jam_file, annotators.keys()[i])) continue ann_times = np.asarray(ann_times) est_times = np.asarray(est_times) #print jam_file, ann_times, est_times, annotators.keys()[i] results = EV.compute_results(ann_times, est_times, trim, bins, jam_file) FPR = np.vstack((FPR, tuple(results))) if i == 1: mgp_results = np.vstack((mgp_results, FPR)) else: if np.asarray([mgp_results, FPR]).ndim != 3: logging.warning("Ndim is not valid %d" % np.asarray([mgp_results, FPR]).ndim) print len(mgp_results) print len(FPR) continue mgp_results = np.mean([mgp_results, FPR], axis=0) FPR = np.mean(FPR, axis=0) logging.info("Results for %s:\n\tF3: %.4f, P3: %.4f, R3: %.4f\n" "\tF05: %.4f, P05: %.4f, R05: %.4f" % ( annotators.keys()[i], FPR[2], FPR[0], FPR[1], FPR[5], FPR[3], FPR[4])) return mgp_results
def compute_mgp(jams_files, annotators, trim): """Computes the Mean Ground-truth Performance of the experiment results. For all the jams files of the experiment, it compares the results with the ground truth (also contained in the jam file), and computes the Mean Ground-truth Performance across them. It also reads the MGP of the machines, and plots them along with the humans results. Parameters ---------- jams_files: list List containing all the file paths to the experiment files. annotators: dict Dictionary containing the names and e-mail addresses of the 5 different annotators. trim: boolean Whether to trim the first and last boundaries. Returns ------- mgp_results: np.array Array containing the results as in the eval module of MSAF for the humans performance. """ mgp_results = np.empty((0, 9)) est_context = "large_scale" bins = 250 for i in xrange(1, len(annotators.keys())): FPR = np.empty((0, 9)) for jam_file in jams_files: ds_name = os.path.basename(jam_file).split("_")[0] ann_context = MSAF.prefix_dict[ds_name] ann_times, ann_labels = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=annotators.keys()[0], context=ann_context) try: est_times, est_labels = jams2.converters.load_jams_range( jam_file, "sections", annotator_name=annotators.keys()[i], context=est_context) except: logging.warning("Couldn't read annotator %d in JAMS %s" % (i, jam_file)) continue if len(ann_times) == 0: logging.warning("No GT annotations in file %s" % jam_file) continue if len(est_times) == 0: logging.warning("No annotation in file %s for annotator %s." % (jam_file, annotators.keys()[i])) continue ann_times = np.asarray(ann_times) est_times = np.asarray(est_times) #print jam_file, ann_times, est_times, annotators.keys()[i] results = EV.compute_results(ann_times, est_times, trim, bins, jam_file) FPR = np.vstack((FPR, tuple(results))) if i == 1: mgp_results = np.vstack((mgp_results, FPR)) else: if np.asarray([mgp_results, FPR]).ndim != 3: logging.warning("Ndim is not valid %d" % np.asarray([mgp_results, FPR]).ndim) print len(mgp_results) print len(FPR) continue mgp_results = np.mean([mgp_results, FPR], axis=0) FPR = np.mean(FPR, axis=0) logging.info("Results for %s:\n\tF3: %.4f, P3: %.4f, R3: %.4f\n" "\tF05: %.4f, P05: %.4f, R05: %.4f" % (annotators.keys()[i], FPR[2], FPR[0], FPR[1], FPR[5], FPR[3], FPR[4])) return mgp_results