Пример #1
0
def test_dump(capsys, mocker, tmpdir):
    tmpdir.join('data').mkdir()
    dump(mocker.Mock(repos=CLTS(str(tmpdir))), test=True)
    out, err = capsys.readouterr()
    assert Path(str(tmpdir)).joinpath('data', 'graphemes.tsv').exists()
    stats(mocker.Mock(repos=CLTS(str(tmpdir))))
    out, err = capsys.readouterr()
    assert 'Unique graphemes' in out
Пример #2
0
def main(args=None):  # pragma: no cover
    parser = ArgumentParserWithLogging('pyclts')
    parser.add_argument(
        "--repos",
        default=CLTS(repos=Path(__file__).parent.parent.parent),
        type=CLTS,
        help="Path to clts repos.")
    parser.add_argument("--format",
                        default="pipe",
                        help="Format of tabular output.")
    parser.add_argument('--nonames',
                        help="do not report the sound names in the output",
                        action='store_true')
    parser.add_argument('--filter',
                        help="only list generated sounds",
                        default='')
    parser.add_argument('--data',
                        help="specify the transcription data you want to load",
                        default="phoible")
    parser.add_argument(
        '--system',
        help="specify the transcription system you want to load",
        default="bipa")

    res = parser.main(args=args)
    if args is None:  # pragma: no cover
        sys.exit(res)
Пример #3
0
def main(args):  # pragma: no cover
    data = Data()
    clts_repos = Path(__file__).parent.parent.parent.parent.resolve() / 'clts-data'
    clts_repos = CLTS(clts_repos)
    print(clts_repos.repos)
    version = 'v2.1.0' # assert_release(clts_repos.repos)

    for rec in Database.from_file(args.cldf.bibpath, lowercase=True):
        data.add(common.Source, rec.id, _obj=bibtex2source(rec))

    dataset = common.Dataset(
        id='clts',
        name="CLTS {0}".format(version),
        publisher_name="Max Planck Institute for Evolutionary Anthropology",
        publisher_place="Leipzig",
        publisher_url="http://www.eva.mpg.de",
        license="http://creativecommons.org/licenses/by/4.0/",
        contact='*****@*****.**',
        domain='clts.clld.org',
        jsondata={
            'license_icon': 'cc-by.png',
            'license_name': 'Creative Commons Attribution 4.0 International License'})
    DBSession.add(dataset)
    for i, name in enumerate([
        'Johann-Mattis List',
        'Cormac Anderson',
        'Tiago Tresoldi',
        'Robert Forkel',
    ]):
        c = common.Contributor(id=slug(name), name=name)
        dataset.editors.append(common.Editor(contributor=c, ord=i))

    for line in args.cldf['data/features.tsv']:
        data.add(
            models.Feature,
            line['ID'],
            id=line['ID'],
            name='{} {}: {}'.format(line['TYPE'], line['FEATURE'], line['VALUE']),
            sound_type=line['TYPE'],
            feature=line['FEATURE'],
            value=line['VALUE'],
        )

    DBSession.add(models.SoundSegment(
        id='NA',
        name='<NA>',
        description='<NA>',
        type='marker',
        generated=True,
        unicode='',
        color='#bbbbbb',
    ))
    for line in args.cldf['data/sounds.tsv']:
        s = data.add(
            models.SoundSegment,
            line['ID'],
            id=line['ID'],
            name=line['GRAPHEME'],
            description=line['NAME'],
            type=line['TYPE'],
            generated=line['GENERATED'],
            unicode=' / '.join(line['UNICODE']),
            color=clts_repos.soundclass('color').resolve_sound(line['GRAPHEME']),
        )
        if s.color == '0':
            s.color = '#bbbbbb'
        assert s.color in LEGEND
    DBSession.flush()

    seen = set()
    for line in args.cldf['data/sounds.tsv']:
        for fid in line['FEATURES']:
            spk, fpk = data['SoundSegment'][line['ID']].pk, data['Feature'][fid].pk
            if (spk, fpk) not in seen:
                DBSession.add(models.SoundSegmentFeature(soundsegment_pk=spk, feature_pk=fpk))
                seen.add((spk, fpk))

    english = data.add(
        common.Language, 'eng',
        id='eng',
        name='English')

    for line in args.cldf['sources/index.tsv']:
        c = data.add(
            models.Transcription,
            line['NAME'],
            id=line['NAME'],
            name=line['NAME'],
            description=line['DESCRIPTION'].replace(':bib:', '/sources/'),
            datatype=getattr(models.Datatype, line['TYPE'])
        )
        for ref in line.get('REFS', []):
            common.ContributionReference(source=data['Source'][ref], contribution=c)

    sound_url_template = args.cldf['data/graphemes.tsv', 'SOUND'].valueUrl
    image_url_template = args.cldf['data/graphemes.tsv', 'IMAGE'].valueUrl

    for line in args.cldf['data/graphemes.tsv']:
        key = line['DATASET'] + ':' + line['NAME'] + ':' + line['GRAPHEME']
        if key not in data['Grapheme']:
            sound_id = line['NAME'].replace(' ', '_')
            vs = data['ValueSet'].get((line['DATASET'], line['NAME']))
            if not vs:
                try:
                    vs = data.add(
                        common.ValueSet,
                        (line['DATASET'], line['NAME']),
                        id=key,
                        description=line['NAME'],
                        language=english,
                        contribution=data['Transcription'][line['DATASET']],
                        parameter=data['SoundSegment'][sound_id]
                    )
                except:
                    print(line)
                    raise
            data.add(
                models.Grapheme,
                key,
                id=key,
                name=line['GRAPHEME'],
                description=line['NAME'],
                url=line['URL'].unsplit() if line['URL'] else None,
                audio=sound_url_template.expand(line) if line['SOUND'] else None,
                image=image_url_template.expand(line) if line['IMAGE'] else None,
                valueset=vs
            )
Пример #4
0
def test_make_app_data(capsys, mocker, tmpdir):
    tmpdir.join('app').mkdir()
    _make_app_data(mocker.Mock(repos=CLTS(str(tmpdir))), test=True)
    assert Path(str(tmpdir)).joinpath('app', 'data.js').exists()
Пример #5
0
def test_stats(capsys, mocker, tmpdir):
    dstats(mocker.Mock(system='bipa', repos=CLTS(str(tmpdir))))
    out, err = capsys.readouterr()
    assert 'id' in out
    stats(mocker.Mock(system='bipa', repos=CLTS('.')))
Пример #6
0
def test_iter_sources(sources, tmpdir):
    api = CLTS(repos=str(tmpdir))
    srcs = list(api.iter_sources(type='td'))
    assert len(srcs[0][1]) == 0
    assert srcs[0][0]['NAME'] == 'test'