def test_ilg_basic(graph_db, ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'basic.txt') parser = inspect_ilg(basic_path) with CorpusContext('basic_ilg', **graph_db) as c: c.reset() c.load(parser, basic_path)
def loading(config, corpus_dir, textgrid_format): with CorpusContext(config) as c: exists = c.exists() if exists: print('Corpus already loaded, skipping import.') return if not os.path.exists(corpus_dir): print('The path {} does not exist.'.format(corpus_dir)) sys.exit(1) with CorpusContext(config) as c: print('loading') if textgrid_format == "buckeye": parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format == "csv": parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format.lower() == "fave": parser = pgio.inspect_fave(corpus_dir) elif textgrid_format == "ilg": parser = pgio.inspect_ilg(corpus_dir) elif textgrid_format == "labbcat": parser = pgio.inspect_labbcat(corpus_dir) elif textgrid_format == "partitur": parser = pgio.inspect_partitur(corpus_dir) elif textgrid_format == "timit": parser = pgio.inspect_timit(corpus_dir) else: parser = pgio.inspect_mfa(corpus_dir) c.load(parser, corpus_dir)
def loading(config, corpus_dir, textgrid_format): with CorpusContext(config) as c: exists = c.exists() if exists: print('Corpus already loaded, skipping import.') return if not os.path.exists(corpus_dir): print('The path {} does not exist.'.format(corpus_dir)) sys.exit(1) with CorpusContext(config) as c: print('loading') if textgrid_format == "buckeye": parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format == "csv": parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format.lower() == "fave": parser = pgio.inspect_fave(corpus_dir) elif textgrid_format == "ilg": parser = pgio.inspect_ilg(corpus_dir) elif textgrid_format == "labbcat": parser = pgio.inspect_labbcat(corpus_dir) elif textgrid_format == "partitur": parser = pgio.inspect_partitur(corpus_dir) elif textgrid_format == "timit": parser = pgio.inspect_timit(corpus_dir) else: parser = pgio.inspect_mfa(corpus_dir) parser.call_back = call_back beg = time.time() c.load(parser, corpus_dir) end = time.time() time_taken = end - beg print('Loading took: {}'.format(time_taken)) save_performance_benchmark(config, 'import', time_taken)
def test_ilg_mismatched(graph_db, ilg_test_dir): mismatched_path = os.path.join(ilg_test_dir, 'mismatched.txt') basic_path = os.path.join(ilg_test_dir, 'basic.txt') parser = inspect_ilg(basic_path) with CorpusContext('mismatch', **graph_db) as c: c.reset() with pytest.raises(ILGWordMismatchError): c.load(parser, mismatched_path)
def loading(config, corpus_dir, textgrid_format): """Load the corpus""" ## first check if a database for the corpus ## has already been created with CorpusContext(config) as c: exists = c.exists() if exists: print('Corpus already loaded, skipping import.') return if not os.path.exists(corpus_dir): print('The path {} does not exist.'.format(corpus_dir)) sys.exit(1) ## if there is no database file, ## begin with importing the corpus textgrid_format = textgrid_format.upper() with CorpusContext(config) as c: print('loading') ## Use the appropriate importer based ## on the format of the corpus if textgrid_format in ["BUCKEYE", "B"]: parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format == "CSV": parser = pgio.inspect_buckeye(corpus_dir) elif textgrid_format.lower() in ["FAVE", "F"]: parser = pgio.inspect_fave(corpus_dir) elif textgrid_format == "ILG": parser = pgio.inspect_ilg(corpus_dir) elif textgrid_format in ["LABBCAT", "L"]: parser = pgio.inspect_labbcat(corpus_dir) elif textgrid_format in ["P", "PARTITUR"]: parser = pgio.inspect_partitur(corpus_dir) elif textgrid_format in ["MAUS", "W"]: parser = pgio.inspect_maus(corpus_dir) elif textgrid_format in ["TIMIT", "T"]: parser = pgio.inspect_timit(corpus_dir) elif textgrid_format in ["W", "maus"]: parser = pgio.inspect_maus(corpus_dir) else: parser = pgio.inspect_mfa(corpus_dir) parser.call_back = call_back beg = time.time() c.load(parser, corpus_dir) end = time.time() time_taken = end - beg print('Loading took: {}'.format(time_taken)) save_performance_benchmark(config, 'import', time_taken)
def test_inspect_ilg_directory(ilg_test_dir): parser = inspect_ilg(ilg_test_dir) assert(len(parser.annotation_types) == 2)
def test_inspect_ilg(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'basic.txt') parser = inspect_ilg(basic_path) assert(len(parser.annotation_types) == 2) assert(parser.annotation_types[1].trans_delimiter == '.')
def test_inspect_ilg_directory(ilg_test_dir): parser = inspect_ilg(ilg_test_dir) assert (len(parser.annotation_tiers) == 2)
def test_inspect_ilg(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'basic.txt') parser = inspect_ilg(basic_path) assert (len(parser.annotation_tiers) == 2) assert (parser.annotation_tiers[1].trans_delimiter == '.')