def from_txt(cls, txt, session=None, **kw): session = session or DBSession lines = nfilter(txt.split('\n')) m = LANGUAGE_LINE_PATTERN.match(lines[0]) assert m kw['id'] = m.group('name') kw['name'] = ' '.join(s.capitalize() for s in kw['id'].split('_')) for cname in ['wals', 'ethnologue', 'glottolog']: if m.group(cname[0]): kw['classification_' + cname] = m.group(cname[0]) kw.update(parse_metadata(lines[1])) doculect = cls(**kw) if doculect.classification_ethnologue: doculect.ethnologue_family = doculect.classification_ethnologue.split( ',')[0] if doculect.classification_glottolog: doculect.glottolog_family = doculect.classification_glottolog.split( ',')[0] doculect.wordlist = Contribution(id=kw['id'], language=doculect, name=doculect.id) parameters = {p.id: p for p in session.query(Parameter)} for line in lines[2:]: if '\t' in line: wid, words, comment = parse_word(line) # if int(wid) not in MEANINGS_ALL: # # drop non-core meanings # continue vsid = '%s-%s' % (doculect.id, wid) vs = Synset(id=vsid, description=comment, language=doculect, contribution=doculect.wordlist, parameter=parameters[wid]) for i, word in enumerate(words): id_ = '%s-%s' % (vsid, i + 1) word, loan = word word = Word(id=id_, name=word, valueset=vs, loan=loan) return doculect
def import_values(values, lang, features, codes, contributors, sources): # pragma: no cover c = Contribution( id=lang['ID'], name='Dataset for {0}'.format(lang['Name']), ) for i, cid in enumerate(lang['Coders'], start=1): DBSession.add( ContributionContributor( contribution=c, contributor_pk=contributors[cid], ord=i, )) l = GrambankLanguage( id=lang['ID'], name=lang['Name'], macroarea=lang['Macroarea'], latitude=lang['Latitude'], longitude=lang['Longitude'], ) for value in values: vs = ValueSet( id=value['ID'], parameter_pk=features[value['Parameter_ID']], language=l, contribution=c, ) Value(id=value['ID'], valueset=vs, name=value['Value'], description=value['Comment'], domainelement_pk=codes[value['Code_ID'] or '{}-NA'.format(value['Parameter_ID'])]) if value['Source']: for ref in value['Source']: sid, pages = Sources.parse(ref) ValueSetReference(valueset=vs, source_pk=sources[sid], description=pages) DBSession.add(c)