Пример #1
0
def test_create(api, wiki, capsys, tmp_path):
    cldf_repos = tmp_path
    cldf.create(StructureDataset.in_dir(cldf_repos / 'cldf'), api,
                Path(__file__).parent / 'glottolog')
    #captured = capsys.readouterr()
    #assert 'inconsistent' in captured.out
    ds = StructureDataset.from_metadata(cldf_repos / 'cldf' /
                                        'StructureDataset-metadata.json')
    assert len(list(ds['ValueTable'])) == 1
    assert ds['contributors.csv', 'Photo'].valueUrl.expand(list(ds['contributors.csv'])[0]) == \
           'https://glottobank.org/photos/abc'
Пример #2
0
def run(args):
    cldf = StructureDataset.in_dir(args.cldf_repos / 'cldf')
    if args.glottolog_version != 'test':  # pragma: no cover
        with Catalog(args.glottolog, args.glottolog_version) as glottolog:
            write_metadata(cldf, args, glottolog)
    else:
        write_metadata(cldf, args, None)
    write_schema(cldf)
    cldf.write(**get_data(cldf, args))
    shutil.copy(str(args.repos.path('LICENSE.txt')), str(args.cldf_repos))
    if not args.dev:
        cldf.validate(log=args.log)
Пример #3
0
def make_cldf(db, out, fid):
    # Initialize a CLDF dataset in the output directory, using the appropriate module:
    ds = StructureDataset.in_dir(out)

    # We add the WALS language metadata:
    ds.add_component('LanguageTable', 'Genus', 'Family')

    # And some metadata about the feature:
    ds.add_component('ParameterTable', 'Authors', 'Url', 'Area')
    ds.add_component('CodeTable')

    # Now we collect the data by querying the database:
    values, languages = [], []

    lids = defaultdict(dict)
    for lpk, ids in groupby(db.execute(SQL_IDENTIFIERS), lambda r: r[0]):
        for itype, names in groupby(ids, lambda rr: rr[1]):
            names = [n[2] for n in names]
            if len(names) == 1:
                # only add identifiers for equivalent languoids, ignore partial matches.
                lids[lpk][itype] = names[0]

    # We store the sources and references per datapoint:
    sources, refs = defaultdict(list), defaultdict(list)
    for vspk, rs in groupby(db.execute(SQL_SOURCES), lambda r: r[0]):
        for r in rs:
            ref = r[2]
            if r[1]:
                ref += '[{0}]'.format(r[1])  # add the page info in the correct format.
            refs[vspk].append(ref)
            sources[vspk].append(Source(r[3], r[2], author=r[4], year=r[5], title=r[6]))

    codes = {}
    for row in db.execute(SQL_VALUES.format(fid)):
        lpk, lid, lname, vsid, denumber, dename, lat, lon, vspk, gname, fname = row
        ids = lids[lpk]
        if vspk in sources:
            ds.sources.add(*sources[vspk])
        languages.append(dict(
            ID=lid,
            Name=lname,
            Latitude=lat,
            Longitude=lon,
            Glottocode=ids.get('glottolog'),
            ISO639P3code=ids.get('iso639-3'),
            Genus=gname,
            Family=fname,
        ))
        values.append(dict(
            ID=vsid,
            Language_ID=lid,
            Parameter_ID=fid,
            Value=denumber,
            Code_ID='{0}-{1}'.format(fid, denumber),
            Source=refs.get(vspk, []),
        ))
        codes[denumber] = {
            'ID': '{0}-{1}'.format(fid, denumber),
            'Name': dename,
            'Parameter_ID': fid,
        }

    fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0]
    ds.write(
        ValueTable=values,
        LanguageTable=languages,
        ParameterTable=[{
            'ID': fid,
            'Name': fname,
            'Area': aname,
            'Authors': fauthors,
            'Url': 'http://wals.info/feature/' + fid}],
        CodeTable=codes.values(),
    )
Пример #4
0
def main(scripts, dev, glr):
    cldf_dir = Path('cldf')
    bib = parse_string(read_text(cldf_dir / 'sources.bib'),
                       bib_format='bibtex')
    write_text(cldf_dir / 'sources.bib', bib.lower().to_string('bibtex'))

    glottolog = Glottolog(glr)
    ds = StructureDataset.in_dir(cldf_dir)
    ds.tablegroup.notes.append(
        OrderedDict([('dc:title', 'environment'),
                     ('properties',
                      OrderedDict([
                          ('glottolog_version', git_describe(glottolog.repos)),
                      ]))]))
    ds.add_columns('ValueTable', {
        'name': 'Marginal',
        'datatype': 'boolean'
    }, {
        'name': 'Allophones',
        'separator': ' '
    }, 'Contribution_ID')
    features = [
        "tone", "stress", "syllabic", "short", "long", "consonantal",
        "sonorant", "continuant", "delayedRelease", "approximant", "tap",
        "trill", "nasal", "lateral", "labial", "round", "labiodental",
        "coronal", "anterior", "distributed", "strident", "dorsal", "high",
        "low", "front", "back", "tense", "retractedTongueRoot",
        "advancedTongueRoot", "periodicGlottalSource", "epilaryngealSource",
        "spreadGlottis", "constrictedGlottis", "fortis",
        "raisedLarynxEjective", "loweredLarynxImplosive", "click"
    ]
    ds.add_component('ParameterTable', 'SegmentClass', *features)
    ds.add_component('LanguageTable')
    ds.add_table(
        'contributions.csv', 'ID', 'Name', 'Contributor_ID', {
            'name': 'Source',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
            'separator': ';'
        }, 'URL')
    ds.add_table(
        'contributors.csv',
        'ID',
        'Name',
        'Description',
        'Readme',
        'Contents',
        {
            'name': 'Source',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
            'separator': ';'
        },
        'URL',
    )

    def read(what):
        return reader(scripts / 'to_cldf' / 'cldf' / what, namedtuples=True)

    languoids = {l.id: l for l in glottolog.languoids()}

    values, segments, languages, inventories, sources = [], [], {}, {}, []
    for contrib in read('contributors.csv'):
        sources.append(
            dict(
                ID=contrib.Name,
                Name=contrib.Contributor,
                Description=contrib.Description,
                Readme=desc(dev, contrib.Name),
                Contents=contrib.Contents,
                Source=[
                    c.strip().lower() for c in contrib.Citation.split(';')
                ],
                URL=contrib.SourceURL if contrib.SourceURL != 'NA' else '',
            ))

    pid_map = {}
    for row in read('parameters.csv'):
        pid = md5(row.Description.encode('utf8')).hexdigest().upper()
        pid_map[row.ID] = pid
        segments.append(
            dict(ID=pid,
                 Name=row.Name,
                 Description=row.Description,
                 SegmentClass=row.SegmentClass,
                 **{f: getattr(row, f)
                    for f in features}))

    src = {}
    for row in read('contributions.csv'):
        src[row.ID] = row.References.split(
            ';') if row.References != 'no source given' else []
        src[row.ID] = [sid.lower() for sid in src[row.ID]]
        inventories[row.ID] = dict(ID=row.ID,
                                   Name=row.Name,
                                   Contributor_ID=row.Contributor_ID,
                                   URL=row.URI if row.URI != 'NA' else '',
                                   Source=src[row.ID])

    uniq = set()
    for row in read('values.csv'):
        pk = (row.Language_ID, row.Parameter_ID, row.Contribution_ID)
        if pk in uniq:
            print('skipping duplicate phoneme {0}'.format(pk))
            continue
        uniq.add(pk)
        lid = row.Language_ID if row.Language_ID in languoids else slug(
            inventories[row.Contribution_ID]['Name'])
        if lid not in languages:
            #
            # FIXME: Language_ID == 'NA' for three inventories! This must be mapped!
            #
            lang = languoids.get(lid)
            languages[lid] = dict(
                ID=lid,
                Name=lang.name if lang else None,
                Glottocode=lang.id if lang else None,
                ISO639P3code=row.ISO639P3code
                if row.ISO639P3code != 'NA' else None,
            )
        values.append(
            dict(
                ID=row.ID,
                Language_ID=lid,
                Parameter_ID=pid_map[row.Parameter_ID],
                Contribution_ID=row.Contribution_ID,
                Value=row.Name,
                Marginal=None if row.Marginal == 'NA' else eval(
                    row.Marginal.lower().capitalize()),  # FALSE|TRUE|NA
                Allophones=row.Allophones.split()
                if row.Allophones != 'NA' else [],
                Source=src[row.Contribution_ID],
            ))

    ds.write(
        **{
            'ValueTable': values,
            'LanguageTable': languages.values(),
            'ParameterTable': segments,
            'contributions.csv': inventories.values(),
            'contributors.csv': sources
        })
    ds.validate(logging.getLogger(__name__))
Пример #5
0
idx = 1
for line in forms:
    data = line.strip().split()
    lid = data[0]
    for i, p in enumerate(data[1:]):
        pid = str(i+1)
        formtable += [{
            "ID": '{0}-{1}-{2}'.format(lid, pid, idx),
            "Value": p,
            "Language_ID": lid,
            "Parameter_ID": pid,
            "Source": ["Szeto2018"]
            }]
        idx += 1

ds = StructureDataset.in_dir('cldf')
ds.add_sources(Source('article', 'Szeto2018', 
        author = 'Szeto, Pui Yiu and Ansaldo, Umberto and Matthews, Steven',
        journal = 'Linguistic Typology',
        pages = '233-275',
        title = 'Typological variation across Mandarin dialects: An areal perspective with a quantitative approach',
        doi = '10.1515/lingty-2018-0009',
        ))

ds.add_component('ParameterTable')
ds.add_component('LanguageTable')
ds.write(ValueTable=formtable, ParameterTable=parametertable,
        LanguageTable=languagetable)

ds.write_metadata()
ds.write_sources()
Пример #6
0
def make_cldf(db, out, fid):
    # Initialize a CLDF dataset in the output directory, using the appropriate module:
    ds = StructureDataset.in_dir(out)

    # We add the WALS language metadata:
    ds.add_component('LanguageTable', 'Genus', 'Family')

    # And some metadata about the feature:
    ds.add_component('ParameterTable', 'Authors', 'Url', 'Area')
    ds.add_component('CodeTable')

    # Now we collect the data by querying the database:
    values, languages = [], []

    lids = defaultdict(dict)
    for lpk, ids in groupby(db.execute(SQL_IDENTIFIERS), lambda r: r[0]):
        for itype, names in groupby(ids, lambda rr: rr[1]):
            names = [n[2] for n in names]
            if len(names) == 1:
                # only add identifiers for equivalent languoids, ignore partial matches.
                lids[lpk][itype] = names[0]

    # We store the sources and references per datapoint:
    sources, refs = defaultdict(list), defaultdict(list)
    for vspk, rs in groupby(db.execute(SQL_SOURCES), lambda r: r[0]):
        for r in rs:
            ref = r[2]
            if r[1]:
                ref += '[{0}]'.format(
                    r[1])  # add the page info in the correct format.
            refs[vspk].append(ref)
            sources[vspk].append(
                Source(r[3], r[2], author=r[4], year=r[5], title=r[6]))

    codes = {}
    for row in db.execute(SQL_VALUES.format(fid)):
        lpk, lid, lname, vsid, denumber, dename, lat, lon, vspk, gname, fname = row
        ids = lids[lpk]
        if vspk in sources:
            ds.sources.add(*sources[vspk])
        languages.append(
            dict(
                ID=lid,
                Name=lname,
                Latitude=lat,
                Longitude=lon,
                Glottocode=ids.get('glottolog'),
                ISO639P3code=ids.get('iso639-3'),
                Genus=gname,
                Family=fname,
            ))
        values.append(
            dict(
                ID=vsid,
                Language_ID=lid,
                Parameter_ID=fid,
                Value=denumber,
                Code_ID='{0}-{1}'.format(fid, denumber),
                Source=refs.get(vspk, []),
            ))
        codes[denumber] = {
            'ID': '{0}-{1}'.format(fid, denumber),
            'Name': dename,
            'Parameter_ID': fid,
        }

    fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0]
    ds.write(
        ValueTable=values,
        LanguageTable=languages,
        ParameterTable=[{
            'ID': fid,
            'Name': fname,
            'Area': aname,
            'Authors': fauthors,
            'Url': 'http://wals.info/feature/' + fid
        }],
        CodeTable=codes.values(),
    )
Пример #7
0
def main(scripts, dev, glr):
    cldf_dir = Path('cldf')
    bib = parse_string(read_text(cldf_dir / 'sources.bib'), bib_format='bibtex')
    for _, e in bib.entries.items():
        for field in e.fields:
            e.fields[field] = e.fields[field].replace('\\', '')
    write_text(cldf_dir / 'sources.bib', bib.lower().to_string('bibtex'))

    glottolog = Glottolog(glr)

    ds = StructureDataset.in_dir(cldf_dir)

    def describe_repos(r, org, name=None):
        return OrderedDict([
            ('dc:title', '{0}/{1}'.format(org, name or r.name)),
            ('dc:description', git_describe(r))])

    ds.tablegroup.common_props['prov:wasDerivedFrom'] = [
        describe_repos(dev, 'phoible'),
        describe_repos(scripts, 'bambooforest'),
        describe_repos(glottolog.repos, 'clld'),
    ]
    ds.tablegroup.common_props['prov:wasGeneratedBy'] = describe_repos(
        Path(__file__).parent, 'cldf-datasets', name='phoible')

    ds.add_columns(
        'ValueTable',
        {'name': 'Marginal', 'datatype': 'boolean'},
        {'name': 'Allophones', 'separator': ' '},
        'Contribution_ID')
    features = ["tone","stress","syllabic","short","long","consonantal","sonorant","continuant","delayedRelease","approximant","tap","trill","nasal","lateral","labial","round","labiodental","coronal","anterior","distributed","strident","dorsal","high","low","front","back","tense","retractedTongueRoot","advancedTongueRoot","periodicGlottalSource","epilaryngealSource","spreadGlottis","constrictedGlottis","fortis","raisedLarynxEjective","loweredLarynxImplosive","click"]
    ds.add_component('ParameterTable', 'SegmentClass', *features)
    ds.add_component('LanguageTable', 'Family_Glottocode', 'Family_Name')
    table = ds.add_table(
        'contributions.csv', 
        'ID', 
        'Name', 
        'Contributor_ID', 
        {'name': 'Source', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source', 'separator': ';'},
        'URL',
        {'name': 'count_phonemes', 'required': True, 'datatype': {'base': 'integer', 'minimum': 0}},
        {'name': 'count_consonants', 'required': True, 'datatype': {'base': 'integer', 'minimum': 0}},
        {'name': 'count_vowels', 'required': True, 'datatype': {'base': 'integer', 'minimum': 0}},
        {'name': 'count_tones', 'datatype': {'base': 'integer', 'minimum': 0}, 'null': 'NA'},
    )
    table.tableSchema.primaryKey = ['ID']
    table.tableSchema.foreignKeys.append(ForeignKey.fromdict(dict(
        columnReference='Contributor_ID',
        reference=dict(resource='contributors.csv', columnReference='ID'))))
    table.common_props['dc:conformsTo'] = None
    table = ds.add_table(
        'contributors.csv',
        'ID',
        'Name',
        'Description',
        'Readme',
        'Contents',
        {'name': 'Source', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source', 'separator': ';'},
        'URL',
        {'name': 'with_tones', 'datatype': {'base': 'boolean', 'format': '1|0'}},
    )
    table.tableSchema.primaryKey = ['ID']
    table.common_props['dc:conformsTo'] = None

    def read(what):
        return reader(scripts / 'to_cldf' / 'cldf' / what, namedtuples=True)

    languoids = {l.id: l for l in glottolog.languoids()}

    values, segments, languages, inventories, sources = [], [], OrderedDict(), OrderedDict(), []
    with_tones = {}
    for contrib in read('contributors.csv'):
        sources.append(dict(
            ID=contrib.Name,
            Name=contrib.Contributor,
            Description=contrib.Description,
            Readme=desc(dev, contrib.Name),
            Contents=contrib.Contents,
            Source=[c.strip().lower() for c in contrib.Citation.split(';')],
            URL=contrib.SourceURL if contrib.SourceURL != 'NA' else '',
            with_tones=contrib.with_tones == '1',
        ))
        with_tones[contrib.Name] = contrib.with_tones == '1'

    pid_map = {}
    for row in read('parameters.csv'):
        pid = md5(row.Description.encode('utf8')).hexdigest().upper()
        pid_map[row.ID] = (pid, row.SegmentClass)
        segments.append(dict(
            ID=pid,
            Name=row.Name,
            Description=row.Description,
            SegmentClass=row.SegmentClass,
            **{f: getattr(row, f) for f in features}
        ))

    src = {}
    for row in read('contributions.csv'):
        src[row.ID] = row.References.split(';') if row.References != 'no source given' else []
        src[row.ID] = [sid.lower() for sid in src[row.ID]]
        inventories[row.ID] = dict(
            ID=row.ID, 
            Name=row.Name, 
            Contributor_ID=row.Contributor_ID.upper(), 
            URL=row.URI if row.URI != 'NA' else '',
            Source=src[row.ID],
            count_phonemes=0,
            count_consonants=0,
            count_vowels=0,
            count_tones=0,
        )

    uniq, counts = set(), Counter()
    for row in read('values.csv'):
        pk = (row.Language_ID, row.Parameter_ID, row.Contribution_ID)
        if pk in uniq:
            print('skipping duplicate phoneme {0}'.format(pk))
            continue
        uniq.add(pk)
        lid = row.Language_ID if row.Language_ID in languoids else slug(inventories[row.Contribution_ID]['Name'])
        if lid not in languages:
            #
            # FIXME: Language_ID == 'NA' for three inventories! This must be mapped!
            #
            lang = languoids.get(lid)
            fam = lang.lineage[0] if lang and lang.lineage else None
            languages[lid] = dict(
                ID=lid,
                Name=lang.name if lang else None,
                Glottocode=lang.id if lang else None,
                ISO639P3code=row.ISO639P3code if row.ISO639P3code != 'NA' else None,
                Macroarea=lang.macroareas[0].value if lang and lang.macroareas else None,
                Latitude=lang.latitude if lang else None,
                Longitude=lang.longitude if lang else None,
                Family_Glottocode=fam[1] if fam else None,
                Family_Name=fam[0] if fam else None,
            )
        pid, sc = pid_map[row.Parameter_ID]
        counts.update([(row.Contribution_ID, sc)])
        values.append(dict(
            ID=row.ID,
            Language_ID=lid,
            Parameter_ID=pid,
            Contribution_ID=row.Contribution_ID,
            Value=row.Name,
            Marginal=None if row.Marginal == 'NA' else eval(row.Marginal.lower().capitalize()),  # FALSE|TRUE|NA
            Allophones=row.Allophones.split() if row.Allophones != 'NA' else [],
            Source=src[row.Contribution_ID],
        ))
    for key, count in counts.items():
        inventories[key[0]]['count_{0}s'.format(key[1])] = count
        inventories[key[0]]['count_phonemes'] += count

    for inv in inventories.values():
        if not with_tones[inv['Contributor_ID']]:
            assert inv['count_tones'] == 0
            inv['count_tones'] = 'NA'

    ds.write(**{
        'ValueTable': values,
        'LanguageTable': languages.values(),
        'ParameterTable': segments,
        'contributions.csv': inventories.values(),
        'contributors.csv': sources
    })
    ds.validate(logging.getLogger(__name__))
Пример #8
0
def cldf(api, outdir, log):
    if not outdir.exists():
        outdir.mkdir()
    for p in outdir.iterdir():
        if p.suffix in ['.bib', '.csv', '.json']:
            p.unlink()
    ds = StructureDataset.in_dir(outdir)
    ds.add_provenance(
        wasDerivedFrom=repos('glottolog', clone=api.repos),
        wasGeneratedBy=repos('pyglottolog', version=pyglottolog.__version__),
    )
    ds.add_component('ParameterTable', {'name': 'type', 'default': None})
    ds.add_component('CodeTable', 'numerical_value')
    ds.add_columns('ValueTable', 'codeReference')
    ds.add_component(
        'LanguageTable',
        dict(name='Countries', separator=';'),
        {
            'name': 'Family_ID',
            'dc:description': 'Glottocode of the top-level genetic unit, the '
            'languoid belongs to'},
        {
            'name': 'Language_ID',
            'dc:description': 'Glottocode of the language-level languoid, the '
            'languoid belongs to (in case of dialects)'},
    )
    ds.add_foreign_key('LanguageTable', 'Family_ID', 'LanguageTable', 'ID')
    ds.add_foreign_key('LanguageTable', 'Language_ID', 'LanguageTable', 'ID')

    ds['LanguageTable', 'Macroarea'].separator = ';'
    ds['ValueTable', 'Value'].null = ['<NA>']

    data = collections.defaultdict(list)
    data['ParameterTable'].extend([
        dict(ID='level', Name='Level', type='categorical'),
        dict(ID='category', Name='Category', type='categorical'),
        dict(ID='classification', Name='Classification'),
        dict(ID='subclassification', Name='Subclassification'),
        dict(ID='aes', Name='Agglomerated Endangerment Status', type='sequential'),
        dict(ID='med', Name='Most Extensive Description', type='sequential'),
    ])
    for level in api.languoid_levels.values():
        data['CodeTable'].append(dict(
            ID='level-{0}'.format(level.name),
            Parameter_ID='level',
            Name=level.name,
            Description=level.description,
            numerical_value=level.ordinal))
        data['CodeTable'].append(dict(
            ID='category-{0}'.format(level.name.capitalize()),
            Parameter_ID='category',
            Name=level.name.capitalize()))
    for el in sorted(api.language_types.values()):
        data['CodeTable'].append(dict(
            ID='category-{0}'.format(el.category.replace(' ', '_')),
            Parameter_ID='category',
            Name=el.category))
    for el in sorted(api.aes_status.values()):
        data['CodeTable'].append(dict(
            ID='aes-{0}'.format(el.name.replace(' ', '_')),
            Parameter_ID='aes',
            Name=el.name,
            numerical_value=el.ordinal))
    for el in sorted(api.med_types.values()):
        data['CodeTable'].append(dict(
            ID='med-{0}'.format(el.id),
            Parameter_ID='med',
            Name=el.name,
            Description=el.description,
            numerical_value=el.rank))
    languoids = collections.OrderedDict((l.id, l) for l in api.languoids())
    refs_by_languoid, refs = api.refs_by_languoid(languoids)

    def get_language_id(l):
        if l.level == api.languoid_levels.dialect:
            for _, lid, _ in reversed(l.lineage):
                if languoids[lid].level == api.languoid_levels.language:
                    return lid

    def format_ref(ref):
        return '{0}[{1}]'.format(ref.key, ref.pages.replace(';', ',')) if ref.pages else ref.key

    for l in languoids.values():
        data['LanguageTable'].append(dict(
            ID=l.id,
            Name=l.name,
            Glottocode=l.id,
            ISO639P3code=l.iso,
            Latitude=l.latitude,
            Longitude=l.longitude,
            Macroarea=[ma.name for ma in l.macroareas],
            Countries=[c.id for c in l.countries],
            Family_ID=l.lineage[0][1] if l.lineage else None,
            Language_ID=get_language_id(l),
        ))
        med = sorted(refs_by_languoid[l.id], reverse=True)[0] if l.id in refs_by_languoid else None
        if med:
            ds.add_sources(Source(med.type, med.id, _check_id=False, **med.fields))
        clf = l.classification_comment
        if clf:
            for ref in clf.merged_refs('family') + clf.merged_refs('sub'):
                if ref.key not in refs:
                    log.warning('missing reference in classification comment: {0}'.format(ref))
                    continue
                e = refs[ref.key]
                ds.add_sources(Source(e.type, ref.key, _check_id=False, **e.fields))

        aes_src = l.endangerment.source.reference_id if l.endangerment else None
        if aes_src:
            e = refs[aes_src]
            ds.add_sources(Source(e.type, aes_src, _check_id=False, **e.fields))

        data['ValueTable'].extend([
            value(
                l.id,
                'level',
                l.level.name,
                Code_ID='level-{0}'.format(l.level.name)),
            value(l.id, 'category', l.category.replace(' ', '_')),
            value(
                l.id,
                'classification',
                '/'.join(l[1] for l in l.lineage),
                Source=[format_ref(ref) for ref in clf.merged_refs('family')] if clf else [],
                Comment=clf.family if clf else None,
            ),
            value(
                l.id,
                'subclassification',
                l.newick_node(nodes=languoids, template="{l.id}").newick,
                Source=[format_ref(ref) for ref in clf.merged_refs('sub')] if clf else [],
                Comment=clf.sub if clf else None,
            ),
            value(
                l.id,
                'aes',
                l.endangerment.status.name if l.endangerment else None,
                Comment=l.endangerment.comment if l.endangerment else None,
                Source=[aes_src] if aes_src else [],
                Code_ID='aes-{0}'.format(
                    l.endangerment.status.name.replace(' ', '_')) if l.endangerment else None),
            value(
                l.id,
                'med',
                med.med_type.name if med else None,
                Source=[med.id] if med else [],
                Code_ID='med-{0}'.format(med.med_type.id) if med else None),
        ])

    ds.write(outdir / 'cldf-metadata.json', **data)
    ds.validate(log=log)