def run(self, args): for filename in args.infile: with open(filename, 'rb') as handler: self.add(evaluation.Evaluation().load(handler)) best_th, mean_f = self.get_best() print best_th
def run(self, args): e = evaluation.Evaluation() r = [] p = [] f = [] for name in args.infile: with open(name, 'rb') as h: e = e.load(h) r.append(e.data.recall) p.append(e.data.precision) f.append(e.data.f) print "Recall: mean=%f, std_dev=%f" % (numpy.mean(r), numpy.std(r)) print "Precision: mean=%f, std_dev=%f" % (numpy.mean(p), numpy.std(p)) print "F-Measure: mean=%f, std_dev=%f" % (numpy.mean(f), numpy.std(f))
def run(self, args): e = evaluation.Evaluation() if not args.no_header: print "Info,N_estimated,N_reference,N_Correct,Recall,Precision,"\ "F-Measure" for name in sorted(args.infile): with open(name, 'rb') as f: e = e.load(f) name = e.metadata.reference_input.name if args.short_filenames: name = name.split('/') name = name[-1] name = name.split('.') name = name[0] print "%s,%d,%d,%d,%.13f,%.13f,%.13f" % ( name, e.data.n_estimated, e.data.n_reference, e.data.n_correct, e.data.recall, e.data.precision, e.data.f)
def evaluate(self, identification, estimated, reference, frame_length=0.01, save_metadata=True): """Computes the evaluation based on a estimated and reference scores. Args: identification: some form of identification that will be stored in metadata. estimated: estimated score. reference: reference score. frame_length: step size for time. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Evaluation object. """ t_start_estimated, t_end_estimated = estimated.get_timespan() t_start_reference, t_end_reference = reference.get_timespan() correct = 0. total_estimated = 0. total_reference = 0. # Sanity check if t_end_estimated - t_start_estimated >= 0 and \ t_end_reference - t_start_reference >= 0: # Starts at the first frame t = min(t_start_estimated, t_start_reference) # Ends with the minimum frame time t_end = min(t_end_estimated, t_end_reference) while t < t_end: # Gets notes active at the current time estimated_active_notes = estimated.get_active_notes(t) reference_active_notes = reference.get_active_notes(t) total_estimated += len(estimated_active_notes) total_reference += len(reference_active_notes) for e in estimated_active_notes: e_name = e.to_name() for r in reference_active_notes: if e_name == r.to_name(): correct += 1 # As each reference note can match only a single # estimation, we remove the matched reference reference_active_notes.remove(r) # Stops looking for references, as we got a match break t += frame_length # Creates evaluation object with the description of the method e = evaluation.Evaluation(total_estimated, total_reference, correct) e.metadata.estimated = estimated.metadata e.metadata.reference = reference.metadata e.metadata.method = md.Metadata(name='mirex framewise', id=identification) if save_metadata: e.metadata.estimated_input = md.ObjectMetadata(estimated) e.metadata.reference_input = md.ObjectMetadata(reference) return e
def evaluate(self, identification, estimated, reference, onset_tolerance=0.05, duration_tolerance=-1, ignore_pitch=False, save_metadata=True): """Computes the evaluation based on a estimated and reference scores. Args: identification: some form of identification that will be stored in metadata. estimated: estimated score. reference: reference score. onset_tolerance: additive tolerance for the onset to be valid. duration_tolerance: multiplicative tolerance for the duration to be valid. If negative, ignore duration restrictions. ignore_pitch: ignore notes' pitch when evaluating. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Evaluation object. """ n_ref = len(reference.data) n_est = len(estimated.data) correct = 0 # Don't use default comparison because: # 1) some crazy person may want to change it, and that could break this # code # 2) we don't need to order offset and pitch estimated_data = sorted(estimated.data, key=lambda n: n.data.onset) reference_data = sorted(reference.data, key=lambda n: n.data.onset) negative_duration_tolerance = (duration_tolerance < 0) # Iterates estimated data to match the reference for e in estimated_data: e_onset = e.data.onset e_duration = e.data.duration e_name = e.to_name() # As the notes are ordered by onset, we can remove from the # reference every note whose onset is below the current lower bound for i in xrange(len(reference_data)): if reference_data[i].data.onset >= e_onset - onset_tolerance: break reference_data = reference_data[i:] for r in reference_data: # Checks if onset is above range. If so, we can stop the search # because all other notes after it will also be above if r.data.onset > e_onset + onset_tolerance: break # Checks if notes match in duration and name if required if (negative_duration_tolerance or (abs(e_duration-r.data.duration) < \ max(r.data.duration * duration_tolerance, onset_tolerance))) \ and (ignore_pitch or e_name == r.to_name()): correct += 1 # As each reference note can match only a single estimation, # we remove the matched reference reference_data.remove(r) # Stops looking for references, as we got a match break # Creates evaluation object with the description of the method e = evaluation.Evaluation(n_est, n_ref, correct) e.metadata.estimated = estimated.metadata e.metadata.reference = reference.metadata e.metadata.method = md.Metadata(name='mirex symbolic', id=identification, duration_tolerance=duration_tolerance, onset_tolerance=onset_tolerance) if save_metadata: e.metadata.estimated_input = md.ObjectMetadata(estimated) e.metadata.reference_input = md.ObjectMetadata(reference) return e