Пример #1
0
def test_db(tmpdir, dataset, mocker, capsys):
    db = Database(str(tmpdir.join('lexibank.sqlite')))
    db.load(dataset)
    db.create(exists_ok=True)
    with pytest.raises(ValueError):
        db.create()
    db.create(force=True)
    db.load(dataset)
    db.load_glottolog_data(dataset.glottolog)
    db.load_concepticon_data(mocker.Mock(conceptsets={}))
    for sql in db.sql:
        db.fetchall(sql)
    with db.connection() as conn:
        db.fetchall('select * from dataset', conn=conn, verbose=True)
    out, _ = capsys.readouterr()
    assert 'select' in out

    db.create(force=True)
    db.load(dataset)
    cldf_ds = dataset.cldf_reader()
    cols = cldf_ds['FormTable'].tableSchema.columns
    cols.append(Column(name='custom'))
    cldf_ds.write_metadata()
    db.load(dataset)
    cols.pop()
    cols.append(Column(name='custom', datatype='integer'))
    cldf_ds.write_metadata()
    with pytest.raises(ValueError):
        db.load(dataset)
    cols.pop()
    cldf_ds.write_metadata()
    db.load(dataset)
Пример #2
0
def test_db_multiple_datasets(tmpdir, dataset, dataset_cldf, capsys):
    db = Database(str(tmpdir.join('lexibank.sqlite')))
    db.load(dataset)
    db.load(dataset_cldf, verbose=True)
    with db.connection() as conn:
        res = db.fetchall('select `id`, `name` from LanguageTable', conn=conn)
        assert len(res) == 3
        assert ('1', 'Lang CLDF') in [(r[0], r[1]) for r in res]
        res = db.fetchall('select `id`, `value` from FormTable', conn=conn)
        assert ('1', 'abc') in [(r[0], r[1]) for r in res]
Пример #3
0
def requirements(args):
    if args.cfg.datasets:
        print(
            '-e git+https://github.com/clld/glottolog.git@{0}#egg=pyglottolog'.
            format(git_hash(args.cfg.datasets[0].glottolog.repos)))
        print(
            '-e git+https://github.com/clld/concepticon-data.git@{0}#egg=pyconcepticon'
            .format(git_hash(args.cfg.datasets[0].concepticon.repos)))
    if pylexibank.__version__.endswith('dev0'):
        print(
            '-e git+https://github.com/lexibank/pylexibank.git@{0}#egg=pylexibank'
            .format(git_hash(Path(pylexibank.__file__).parent.parent.parent)))
    db = Database(args.db)
    db.create(exists_ok=True)
    for r in db.fetchall('select id, version from dataset'):
        print(
            '-e git+https://github.com/lexibank/{0}.git@{1}#egg=lexibank_{0}'.
            format(*r))
Пример #4
0
def test_db_multiple_datasets_error(tmpdir, dataset, dataset_factory):
    import shutil
    from clldutils.jsonlib import load, dump

    db = Database(str(tmpdir.join('lexibank.sqlite')))
    assert not db.fname.exists()
    db.load(dataset)

    ds_dir = dataset.dir.parent / 'dbtest'
    shutil.copytree(str(dataset.dir), str(ds_dir))
    # Now modify the CLDF data:
    md = load(ds_dir / 'cldf' / 'cldf-metadata.json')
    for t in md['tables']:
        if t['url'] == 'parameters.csv':
            for col in t['tableSchema']['columns']:
                if col['name'] == 'Chinese':
                    col['name'] = 'chinese'
                    col['datatype'] = 'integer'
    dump(md, ds_dir / 'cldf' / 'cldf-metadata.json')

    ds2 = dataset_factory('dbtest.td')
    with pytest.raises(ValueError) as e:
        db.load(ds2)
        assert 'redefined' in str(e)
Пример #5
0
def _unload(ds, **kw):
    db = Database(kw['db'])
    db.create(exists_ok=True)
    db.unload(ds)
Пример #6
0
def _load(ds, **kw):
    db = Database(kw['db'])
    db.create(exists_ok=True)
    db.load(ds)
    db.load_concepticon_data(ds.concepticon)
    db.load_glottolog_data(ds.glottolog)
Пример #7
0
def ls(args):
    """
    lexibank ls [COLS]+

    column specification:
    - license
    - lexemes
    - macroareas
    """
    db = Database(args.db)
    db.create(exists_ok=True)
    in_db = {
        r[0]: r[1]
        for r in db.fetchall('select id, version from dataset')
    }
    # FIXME: how to smartly choose columns?
    table = Table('ID', 'Title')
    cols = OrderedDict([(col, {}) for col in args.args if col in [
        'version',
        'location',
        'changes',
        'license',
        'all_lexemes',
        'lexemes',
        'concepts',
        'languages',
        'families',
        'varieties',
        'macroareas',
    ]])
    tl = 40
    if cols:
        tl = 25
        table.columns.extend(col.capitalize() for col in cols)

    for col, sql in [
        ('languages', 'glottocodes_by_dataset'),
        ('concepts', 'conceptsets_by_dataset'),
        ('lexemes', 'mapped_lexemes_by_dataset'),
        ('all_lexemes', 'lexemes_by_dataset'),
        ('macroareas', 'macroareas_by_dataset'),
        ('families', 'families_by_dataset'),
    ]:
        if col in cols:
            cols[col] = {r[0]: r[1] for r in db.fetchall(sql)}
    for ds in args.cfg.datasets:
        row = [
            colored(ds.id, 'green' if ds.id in in_db else 'red'),
            truncate_with_ellipsis(ds.metadata.title or '', width=tl),
        ]
        for col in cols:
            if col == 'version':
                row.append(git_hash(ds.dir))
            elif col == 'location':
                row.append(colored(str(ds.dir), 'green'))
            elif col == 'changes':
                row.append(ds.git_repo.is_dirty())
            elif col == 'license':
                lic = licenses.find(ds.metadata.license or '')
                row.append(lic.id if lic else ds.metadata.license)
            elif col in [
                    'languages', 'concepts', 'lexemes', 'all_lexemes',
                    'families'
            ]:
                row.append(float(cols[col].get(ds.id, 0)))
            elif col == 'macroareas':
                row.append(', '.join(
                    sorted((cols[col].get(ds.id) or '').split(','))))
            else:
                row.append('')

        table.append(row)
    totals = ['zztotal', len(args.cfg.datasets)]
    for i, col in enumerate(cols):
        if col in ['lexemes', 'all_lexemes']:
            totals.append(sum([r[i + 2] for r in table]))
        elif col == 'languages':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct glottocode) FROM languagetable")
                    [0]))
        elif col == 'concepts':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct concepticon_id) FROM parametertable"
                    )[0]))
        elif col == 'families':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct family) FROM languagetable")
                    [0]))
        else:
            totals.append('')
    table.append(totals)
    print(
        table.render(tablefmt='simple',
                     sortkey=lambda r: r[0],
                     condensed=False,
                     floatfmt=',.0f'))
Пример #8
0
def db(args):
    db = str(Database(args.db).fname)
    args.log.info('connecting to {0}'.format(colored(db, 'green')))
    check_call(['sqlite3', db])
Пример #9
0
def get_db(args):
    db = Database(args.db)
    db.create(exists_ok=True)
    return db
Пример #10
0
def run(args):
    db = str(Database(args.db).fname)
    args.log.info('connecting to {0}'.format(termcolor.colored(db, 'green')))
    subprocess.check_call(['sqlite3', db])
Пример #11
0
def db(tmp_path):
    return Database(tmp_path / 'lexibank.sqlite')