def load_fragments_within(fname, verbose): with verb_print(' loading folds within', verbose, True, True, True): fragments = load_split(fname, multiple=True) return fragments
def load_disc(fname, corpus, split_file, truncate, verbose): with verb_print(' loading discovered classes', verbose, True, True, True): split_mapping = load_split(split_file) disc, errors = _load_classes(fname, corpus, split_mapping) if not truncate: errors_found = len(errors) > 0 if len(errors) > 100: print 'There were more than 100 interval errors found.' print 'Printing only the first 100.' print errors = errors[:100] for fragment in sorted(errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0} [{1:.3f}, {2:.3f}]'.format( fragment.name, fragment.interval.start, fragment.interval.end) if not truncate and errors_found: print 'There were errors in {0}. Use option -f to'\ ' automatically skip invalid intervals.'.format(fname) sys.exit() if truncate: with verb_print(' checking discovered classes and truncating'): disc, filename_errors, interval_errors = \ truncate_intervals(disc, corpus, split_mapping) else: with verb_print(' checking discovered classes', verbose, True, True, True): filename_errors, interval_errors = \ check_intervals(disc, split_mapping) if not truncate: filename_errors = sorted(filename_errors, key=lambda x: (x.name, x.interval.start)) interval_errors = sorted(interval_errors, key=lambda x: (x.name, x.interval.start)) interval_error = len(interval_errors) > 0 filename_error = len(filename_errors) > 0 errors_found = filename_error or interval_error if interval_error: print banner('intervals found in {0} outside of valid' ' splits'.format(fname)) if len(interval_errors) > 100: print 'There were more than 100 interval errors found.' print 'Printing only the first 100.' print interval_errors = interval_errors[:100] for fragment in sorted(interval_errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0} [{1:.3f}, {2:.3f}]'.format( fragment.name, fragment.interval.start, fragment.interval.end) if filename_error: print banner('unknown filenames found in {0}'.format(fname)) if len(filename_errors) > 100: print 'There were more than 100 filename errors found.' print 'Printing only the first 100.' print filename_errors = filename_errors[:100] for fragment in sorted(filename_errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0}'.format(fragment.name) if not truncate and errors_found: print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format( fname) sys.exit() return disc
def load_disc(fname, corpus, split_file, truncate, verbose): with verb_print(' loading discovered classes', verbose, True, True, True): split_mapping = load_split(split_file) disc, errors = _load_classes(fname, corpus, split_mapping) if not truncate: errors_found = len(errors) > 0 if len(errors) > 100: print 'There were more than 100 interval errors found.' print 'Printing only the first 100.' print errors = errors[:100] for fragment in sorted(errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0} [{1:.3f}, {2:.3f}]'.format( fragment.name, fragment.interval.start, fragment.interval.end) if not truncate and errors_found: print 'There were errors in {0}. Use option -f to'\ ' automatically skip invalid intervals.'.format(fname) sys.exit() if truncate: with verb_print(' checking discovered classes and truncating'): disc, filename_errors, interval_errors = \ truncate_intervals(disc, corpus, split_mapping) else: with verb_print(' checking discovered classes', verbose, True, True, True): filename_errors, interval_errors = \ check_intervals(disc, split_mapping) if not truncate: filename_errors = sorted(filename_errors, key=lambda x: (x.name, x.interval.start)) interval_errors = sorted(interval_errors, key=lambda x: (x.name, x.interval.start)) interval_error = len(interval_errors) > 0 filename_error = len(filename_errors) > 0 errors_found = filename_error or interval_error if interval_error: print banner('intervals found in {0} outside of valid' ' splits'.format(fname)) if len(interval_errors) > 100: print 'There were more than 100 interval errors found.' print 'Printing only the first 100.' print interval_errors = interval_errors[:100] for fragment in sorted(interval_errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0} [{1:.3f}, {2:.3f}]'.format( fragment.name, fragment.interval.start, fragment.interval.end) if filename_error: print banner('unknown filenames found in {0}' .format(fname)) if len(filename_errors) > 100: print 'There were more than 100 filename errors found.' print 'Printing only the first 100.' print filename_errors = filename_errors[:100] for fragment in sorted(filename_errors, key=lambda x: (x.name, x.interval.start)): print ' error: {0}'.format(fragment.name) if not truncate and errors_found: print 'There were errors in {0}. Use option -f to automatically skip invalid intervals.'.format(fname) sys.exit() return disc
# load gold phones and gold words with verb_print(' loading word corpus file', verbose, True, True, True): wrd_corpus = load_corpus_txt(wrd_corpus_file) with verb_print(' loading phone corpus file', verbose, True, True, True): phn_corpus = load_corpus_txt(phn_corpus_file) # load across and withing folds with verb_print(' loading folds cross', verbose, True, True, True): #fragments_cross = load_split(folds_cross_file, # multiple=False) intervals_vad = [load_split(vad_file, multiple=False)] # get list of file names from vad: # names = load_names(vad_file) try: os.makedirs(dest) except OSError: pass # Before loading the class file, check its consistency. # If intervals that overlaps a little with silence are found, # they are trimmed. If intervals that contain silences are found, # an error is thrown and the evaluation breaks. sil_tree = create_silence_tree(vad_file) parse_class_file(disc_clsfile, sil_tree, dest, verbose) # load discovered intervals and gold intervals