Пример #1
0
def test_ilg_basic(graph_db, ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')

    parser = inspect_ilg(basic_path)
    with CorpusContext('basic_ilg', **graph_db) as c:
        c.reset()
        c.load(parser, basic_path)
Пример #2
0
def test_ilg_basic(graph_db, ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')

    parser = inspect_ilg(basic_path)
    with CorpusContext('basic_ilg', **graph_db) as c:
        c.reset()
        c.load(parser, basic_path)
Пример #3
0
def loading(config, corpus_dir, textgrid_format):
    with CorpusContext(config) as c:
        exists = c.exists()
    if exists:
        print('Corpus already loaded, skipping import.')
        return
    if not os.path.exists(corpus_dir):
        print('The path {} does not exist.'.format(corpus_dir))
        sys.exit(1)
    with CorpusContext(config) as c:
        print('loading')

        if textgrid_format == "buckeye":
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format == "csv":
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format.lower() == "fave":
            parser = pgio.inspect_fave(corpus_dir)
        elif textgrid_format == "ilg":
            parser = pgio.inspect_ilg(corpus_dir)
        elif textgrid_format == "labbcat":
            parser = pgio.inspect_labbcat(corpus_dir)
        elif textgrid_format == "partitur":
            parser = pgio.inspect_partitur(corpus_dir)
        elif textgrid_format == "timit":
            parser = pgio.inspect_timit(corpus_dir)
        else:
            parser = pgio.inspect_mfa(corpus_dir)
        c.load(parser, corpus_dir)
Пример #4
0
def loading(config, corpus_dir, textgrid_format):
    with CorpusContext(config) as c:
        exists = c.exists()
    if exists:
        print('Corpus already loaded, skipping import.')
        return
    if not os.path.exists(corpus_dir):
        print('The path {} does not exist.'.format(corpus_dir))
        sys.exit(1)
    with CorpusContext(config) as c:
        print('loading')

        if textgrid_format == "buckeye":
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format == "csv":
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format.lower() == "fave":
            parser = pgio.inspect_fave(corpus_dir)
        elif textgrid_format == "ilg":
            parser = pgio.inspect_ilg(corpus_dir)
        elif textgrid_format == "labbcat":
            parser = pgio.inspect_labbcat(corpus_dir)
        elif textgrid_format == "partitur":
            parser = pgio.inspect_partitur(corpus_dir)
        elif textgrid_format == "timit":
            parser = pgio.inspect_timit(corpus_dir)
        else:
            parser = pgio.inspect_mfa(corpus_dir)
        parser.call_back = call_back
        beg = time.time()
        c.load(parser, corpus_dir)
        end = time.time()
        time_taken = end - beg
        print('Loading took: {}'.format(time_taken))
    save_performance_benchmark(config, 'import', time_taken)
Пример #5
0
def test_ilg_mismatched(graph_db, ilg_test_dir):
    mismatched_path = os.path.join(ilg_test_dir, 'mismatched.txt')
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')

    parser = inspect_ilg(basic_path)

    with CorpusContext('mismatch', **graph_db) as c:
        c.reset()
        with pytest.raises(ILGWordMismatchError):
            c.load(parser, mismatched_path)
Пример #6
0
def test_ilg_mismatched(graph_db, ilg_test_dir):
    mismatched_path = os.path.join(ilg_test_dir, 'mismatched.txt')
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')

    parser = inspect_ilg(basic_path)

    with CorpusContext('mismatch', **graph_db) as c:
        c.reset()
        with pytest.raises(ILGWordMismatchError):
            c.load(parser, mismatched_path)
Пример #7
0
def loading(config, corpus_dir, textgrid_format):
    """Load the corpus"""

    ## first check if a database for the corpus
    ## has already been created
    with CorpusContext(config) as c:
        exists = c.exists()
    if exists:
        print('Corpus already loaded, skipping import.')
        return
    if not os.path.exists(corpus_dir):
        print('The path {} does not exist.'.format(corpus_dir))
        sys.exit(1)

    ## if there is no database file,
    ## begin with importing the corpus
    textgrid_format = textgrid_format.upper()
    with CorpusContext(config) as c:
        print('loading')

        ## Use the appropriate importer based
        ## on the format of the corpus
        if textgrid_format in ["BUCKEYE", "B"]:
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format == "CSV":
            parser = pgio.inspect_buckeye(corpus_dir)
        elif textgrid_format.lower() in ["FAVE", "F"]:
            parser = pgio.inspect_fave(corpus_dir)
        elif textgrid_format == "ILG":
            parser = pgio.inspect_ilg(corpus_dir)
        elif textgrid_format in ["LABBCAT", "L"]:
            parser = pgio.inspect_labbcat(corpus_dir)
        elif textgrid_format in ["P", "PARTITUR"]:
            parser = pgio.inspect_partitur(corpus_dir)
        elif textgrid_format in ["MAUS", "W"]:
            parser = pgio.inspect_maus(corpus_dir)
        elif textgrid_format in ["TIMIT", "T"]:
            parser = pgio.inspect_timit(corpus_dir)
        elif textgrid_format in ["W", "maus"]:
            parser = pgio.inspect_maus(corpus_dir)
        else:
            parser = pgio.inspect_mfa(corpus_dir)
        parser.call_back = call_back
        beg = time.time()
        c.load(parser, corpus_dir)
        end = time.time()
        time_taken = end - beg
        print('Loading took: {}'.format(time_taken))
    save_performance_benchmark(config, 'import', time_taken)
Пример #8
0
def test_inspect_ilg_directory(ilg_test_dir):
    parser = inspect_ilg(ilg_test_dir)
    assert(len(parser.annotation_types) == 2)
Пример #9
0
def test_inspect_ilg(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')
    parser = inspect_ilg(basic_path)
    assert(len(parser.annotation_types) == 2)
    assert(parser.annotation_types[1].trans_delimiter == '.')
Пример #10
0
def test_inspect_ilg_directory(ilg_test_dir):
    parser = inspect_ilg(ilg_test_dir)
    assert (len(parser.annotation_tiers) == 2)
Пример #11
0
def test_inspect_ilg(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'basic.txt')
    parser = inspect_ilg(basic_path)
    assert (len(parser.annotation_tiers) == 2)
    assert (parser.annotation_tiers[1].trans_delimiter == '.')