示例#1
0
    def cmd_makecldf(self, args):
        dsdir = self.dir / 'raw' / 'Verkerk-DravLex-622ac6e'
        dataset = Wordlist.from_metadata(dsdir / 'Wordlist-metadata.json')

        # load concepts
        concepts = args.writer.add_concepts(
            id_factory=lambda c: c.id.split('-')[-1]+ '_' + slug(c.english),
             lookup_factory="Name"
        )

        # load sources from original CLDF, and then the fieldwork source
        args.writer.add_sources(*self.raw_dir.read_bib(dsdir / 'sources.bib'))
        args.writer.add_sources()
        
        # load languages
        args.writer.add_languages()

        # load cognates
        cogs = {
            r['Form_ID']: r for r in self.raw_dir.read_csv(dsdir / 'cognates.csv', dicts=True)
        }
        
        # load data
        for row in self.raw_dir.read_csv(dsdir / 'forms.csv', dicts=True):
            src = row['Source'].split(";") if row['Source'] else ['KolipakamFW']
            cog = cogs.get(row['ID'])
            for lex in args.writer.add_forms_from_value(
                Local_ID=row['ID'],
                Language_ID=row['Language_ID'],
                Parameter_ID=concepts[row['Parameter_ID']],
                Value=row['Form'],
                Source=src,
                Comment=row['status'],
                Loan=True if row['status'] else False
            ):
                args.writer.add_cognate(
                    lexeme=lex,
                    ID=cog['ID'],
                    Source=cog['Source'],
                    Cognateset_ID=cog['Cognateset_ID'],
                    Comment=", ".join([cog['Comment'], cog['source_comment']])
                )
示例#2
0
from xml.etree.ElementTree import fromstring
from xmljson import badgerfish as bf
import sys
import os
import csv
from cariban import util
from pycldf import Wordlist
import re
import pyperclip
lexicon = {}
cariban_data = Wordlist.from_metadata("../cariban_data.json")
for row in cariban_data["FormTable"]:
    alt_glossings = row["Glossing"].split("; ")
    if len(alt_glossings) == 0 or alt_glossings[0] == "":
        meanings = row["Parameter_ID"]
    else:
        meanings = alt_glossings
    lexicon[row["ID"]] = {
        "forms": row["Form"],
        "meanings": meanings,
        "language": row["Language_ID"],
    }
# print(lexicon)


def search_lexicon(form, meaning, language):
    if len(lexicon) == 0:
        return ("X")
    if not meaning.isupper():
        new_meaning = meaning.replace(".", " ")
    else:
示例#3
0
from clldutils.path import Path
from clldutils.misc import slug
from pycldf import Wordlist
from clld_phylogeny_plugin.models import Phylogeny, TreeLabel, LanguageTreeLabel
from clld_cognacy_plugin.models import Cognate, Cognateset
from csvw.dsv import reader


import cobl2
from cobl2 import models
import clld_cognacy_plugin.models


data_file_path = Path(cobl2.__file__).parent / '../..' / 'iecor'

ds = Wordlist.from_metadata(data_file_path / 'cldf' / 'cldf-metadata.json')

photos = {
    p.stem: p.as_posix() for p in
    (Path(cobl2.__file__).parent / '../..' / 'CoBL-public' / 'cobl' / 'static' / 'contributors').iterdir()
    if p.suffix == '.jpg'}
for k, v in {
    'Kümmel': 'Kuemmel',
    'de Vaan': 'deVaan',
    'Dewey-Findell': 'Dewey',
}.items():
    photos[k] = photos[v]


def main(args):
    data = Data()
示例#4
0
文件: clld.py 项目: liualg/pylexibank
 def original_cldf(self):
     for p in self.raw_dir.iterdir():
         if p.name.endswith(MD_SUFFIX):
             return Wordlist.from_metadata(p)
示例#5
0
def main(args):  # pragma: no cover
    wl = Wordlist.from_metadata(args.data_file('cldf', 'cldf-metadata.json'))

    data = Data()
    data.add(
        common.Contributor, 'barthwolfgang',
        id='barthwolfgang',
        name="Wolfgang Barth",
        url="http://www.dynamicsoflanguage.edu.au/")
    #
    # FIXME: get dataset attributes from CLDF metadata!
    #
    dataset = common.Dataset(
        id='parabank',
        name='Parabank Pronouns',
        description='Database of pronouns',
        domain='parabank.clld.org',
        publisher_name="CoEDL Centre of Excellence for the Dynamics of Language",
        publisher_place="Canberra, Australia",
        publisher_url="http://www.dynamicsoflanguage.edu.au/",
        license='http://creativecommons.org/licenses/by/4.0/',
        contact='*****@*****.**',
        jsondata={
            'license_icon': 'cc-by.png',
            'license_name': 'Creative Commons Attribution 4.0'})
    DBSession.add(dataset)

    for i, editor in enumerate(['barthwolfgang']):
        common.Editor(dataset=dataset, contributor=data['Contributor'][editor], ord=i + 1)

    contrib = common.Contribution(id='contrib', name='the contribution')

    for l in wl['LanguageTable']:
        lang = data.add(
            models.ParabankLanguage,
            l['ID'],
            id=l['ID'],
            name=l['Name'],
            description=l['Notes'],
            source=l['Source_Citation'],
            classification=l['Classification'],
        )
        add_language_codes(data, lang, None, glottocode=l['Glottocode'])

    for p in wl['ParameterTable']:
        data.add(
            common.Parameter,
            p['ID'],
            id=p['ID'],
            name='{0} ({1})'.format(p['Name'], p['ID']),
            #description=p['Description'],
        )

    for f in wl['FormTable']:
        vsid = '{0}-{1}'.format(f['Parameter_ID'], f['Language_ID'])
        vs = data['ValueSet'].get(vsid)
        if not vs:
            vs = data.add(
                common.ValueSet,
                vsid,
                id=vsid,
                language=data['ParabankLanguage'][f['Language_ID']],
                parameter=data['Parameter'][f['Parameter_ID']],
                contribution=contrib)

        DBSession.add(models.Word(
            id=f['ID'],
            name=f['Form'],
            comment=f.get('Comment'),
            original=f['Original_parameter'],
            valueset=vs))

    load_families(
        data,
        [(l.glottocode, l) for l in data['ParabankLanguage'].values()],
        glottolog_repos=args.data_file('glottolog'),
        isolates_icon='tcccccc')