def test_score(): # Some real data. expected_scores = Scores('FILE1', 26.39309, 33.24631, 0.71880, 0.72958, 0.72415, 0.60075, 0.58534, 0.80471, 0.72543, 0.96810, 0.55872) ref_turns, _, _ = load_rttm(os.path.join(TEST_DIR, 'ref.rttm')) sys_turns, _, _ = load_rttm(os.path.join(TEST_DIR, 'sys.rttm')) uem = UEM({'FILE1': [(0, 43)]}) file_scores, global_scores = score(ref_turns, sys_turns, uem) assert len(file_scores) == 1 assert file_scores[-1].file_id == expected_scores.file_id assert_almost_equal(file_scores[-1][1:], expected_scores[1:], 3) expected_scores = expected_scores._replace(file_id='*** OVERALL ***') assert global_scores.file_id == expected_scores.file_id assert_almost_equal(global_scores[1:], expected_scores[1:], 3)
def _score_recordings(args): fid, ref_rttm_dir, sys_rttm_dir, collar, ignore_overlaps, step = args ref_rttm_fn = os.path.join(ref_rttm_dir, fid +'.rttm') sys_rttm_fn = os.path.join(sys_rttm_dir, fid + '.rttm') fail = False if not (os.path.exists(ref_rttm_fn)): logger.warn('Missing reference RTTM: %s. Skipping.' % sys_rttm_fn) fail = True if not (os.path.exists(ref_rttm_fn)): logger.warn('Missing system RTTM: %s. Skipping.' % sys_rttm_fn) fail = True if fail: return row = [fid] row.extend(score(ref_rttm_fn, sys_rttm_fn)) return row
def main(): """Main.""" # Parse command line arguments. parser = ArgumentParser( description='Score diarization from RTTM files.', add_help=True, usage='%(prog)s [options]') parser.add_argument( '-r', nargs='+', default=[], metavar='STR', dest='ref_rttm_fns', action=RefRTTMAction, help='reference RTTM files (default: %(default)s)') parser.add_argument( '-R', nargs=None, metavar='STR', dest='ref_rttm_scpf', action=RefRTTMAction, help='reference RTTM script file (default: %(default)s)') parser.add_argument( '-s', nargs='+', default=[], metavar='STR', dest='sys_rttm_fns', action=SysRTTMAction, help='system RTTM files (default: %(default)s)') parser.add_argument( '-S', nargs=None, metavar='STR', dest='sys_rttm_scpf', action=SysRTTMAction, help='system RTTM script file (default: %(default)s)') parser.add_argument( '-u,--uem', nargs=None, metavar='STR', dest='uemf', help='un-partitioned evaluation map file (default: %(default)s)') parser.add_argument( '--collar', nargs=None, default=0.25, type=float, metavar='FLOAT', help='collar size in seconds for DER computaton ' '(default: %(default)s)') parser.add_argument( '--ignore_overlaps', action='store_true', default=False, help='ignore overlaps when computing DER') parser.add_argument( '--jer_min_ref_dur', nargs=None, default=0.0, metavar='FLOAT', help='minimum reference speaker duration for JER ' '(default: %(default)s)') parser.add_argument( '--step', nargs=None, default=0.010, type=float, metavar='FLOAT', help='step size in seconds (default: %(default)s)') parser.add_argument( '--n_digits', nargs=None, default=2, type=int, metavar='INT', help='number of decimal places to print (default: %(default)s)') parser.add_argument( '--version', action='version', version='%(prog)s ' + VERSION) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() # Check that at least one reference RTTM and at least one system RTTM # was specified. if args.ref_rttm_scpf is not None: args.ref_rttm_fns = load_script_file(args.ref_rttm_scpf) if args.sys_rttm_scpf is not None: args.sys_rttm_fns = load_script_file(args.sys_rttm_scpf) if not args.ref_rttm_fns: error('No reference RTTMs specified.') sys.exit(1) if not args.sys_rttm_fns: error('No system RTTMs specified.') sys.exit(1) # Load speaker/reference speaker turns and UEM. If no UEM specified, # determine it automatically. info('Loading speaker turns from reference RTTMs...', file=sys.stderr) ref_turns, _ = load_rttms(args.ref_rttm_fns) info('Loading speaker turns from system RTTMs...', file=sys.stderr) sys_turns, _ = load_rttms(args.sys_rttm_fns) if args.uemf is not None: info('Loading universal evaluation map...', file=sys.stderr) uem = load_uem(args.uemf) else: warn('No universal evaluation map specified. Approximating from ' 'reference and speaker turn extents...') uem = gen_uem(ref_turns, sys_turns) # Trim turns to UEM scoring regions and merge any that overlap. info('Trimming reference speaker turns to UEM scoring regions...', file=sys.stderr) ref_turns = trim_turns(ref_turns, uem) info('Trimming system speaker turns to UEM scoring regions...', file=sys.stderr) sys_turns = trim_turns(sys_turns, uem) info('Checking for overlapping reference speaker turns...', file=sys.stderr) ref_turns = merge_turns(ref_turns) info('Checking for overlapping system speaker turns...', file=sys.stderr) sys_turns = merge_turns(sys_turns) # Score. info('Scoring...', file=sys.stderr) check_for_empty_files(ref_turns, sys_turns, uem) file_scores, global_scores = score( ref_turns, sys_turns, uem, step=args.step, jer_min_ref_dur=args.jer_min_ref_dur, collar=args.collar, ignore_overlaps=args.ignore_overlaps) print_output( file_scores, global_scores, args.n_digits)
info('Loading speaker turns from system RTTMs...', file=sys.stderr) sys_turns, sys_file_ids = load_rttms(args.sys_rttm_fns) if args.uemf is not None: info('Loading universal evaluation map...', file=sys.stderr) uem = load_uem(args.uemf) else: warn('No universal evaluation map specified. Approximating from ' 'reference and speaker turn extents...') uem = gen_uem(ref_turns, sys_turns) # Trim turns to UEM scoring regions and merge any that overlap. info('Trimming reference speaker turns to UEM scoring regions...', file=sys.stderr) ref_turns = trim_turns(ref_turns, uem) info('Trimming system speaker turns to UEM scoring regions...', file=sys.stderr) sys_turns = trim_turns(sys_turns, uem) info('Checking for overlapping reference speaker turns...', file=sys.stderr) ref_turns = merge_turns(ref_turns) info('Checking for overlapping system speaker turns...', file=sys.stderr) sys_turns = merge_turns(sys_turns) # Score. check_for_empty_files(ref_turns, sys_turns, uem) file_to_scores, global_scores = score(ref_turns, sys_turns, uem, args.collar, args.ignore_overlaps, args.step) print_table(file_to_scores, global_scores, args.n_digits, args.table_format)
parser.add_argument('--score_overlaps', action='store_false', default=True, dest='ignore_overlaps', help='score overlaps when computing DER') parser.add_argument('--step', nargs=None, default=0.010, type=float, metavar='FLOAT', help='step size in seconds (Default: %(default)s)') parser.add_argument('--version', action='version', version='%(prog)s ' + VERSION) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() metrics = score(args.ref_rttm, args.sys_rttm, args.collar, args.ignore_overlaps, args.step) logger.info('DER: %.2f' % metrics[0]) logger.info('B-cubed precision: %.2f' % metrics[1]) logger.info('B-cubed recall: %.2f' % metrics[2]) logger.info('B-cubed F1: %.2f' % metrics[3]) logger.info('GKT(ref, sys): %.2f' % metrics[4]) logger.info('GKT(sys, ref): %.2f' % metrics[5]) logger.info('H(ref|sys): %.2f' % metrics[6]) logger.info('MI: %.2f' % metrics[7]) logger.info('NMI: %.2f' % metrics[8])