示例#1
0
    def from_txt(cls, txt, session=None, **kw):
        session = session or DBSession

        lines = nfilter(txt.split('\n'))
        m = LANGUAGE_LINE_PATTERN.match(lines[0])
        assert m
        kw['id'] = m.group('name')
        kw['name'] = ' '.join(s.capitalize() for s in kw['id'].split('_'))
        for cname in ['wals', 'ethnologue', 'glottolog']:
            if m.group(cname[0]):
                kw['classification_' + cname] = m.group(cname[0])

        kw.update(parse_metadata(lines[1]))
        doculect = cls(**kw)
        if doculect.classification_ethnologue:
            doculect.ethnologue_family = doculect.classification_ethnologue.split(
                ',')[0]

        if doculect.classification_glottolog:
            doculect.glottolog_family = doculect.classification_glottolog.split(
                ',')[0]

        doculect.wordlist = Contribution(id=kw['id'],
                                         language=doculect,
                                         name=doculect.id)

        parameters = {p.id: p for p in session.query(Parameter)}

        for line in lines[2:]:
            if '\t' in line:
                wid, words, comment = parse_word(line)
                # if int(wid) not in MEANINGS_ALL:
                #    # drop non-core meanings
                #    continue
                vsid = '%s-%s' % (doculect.id, wid)
                vs = Synset(id=vsid,
                            description=comment,
                            language=doculect,
                            contribution=doculect.wordlist,
                            parameter=parameters[wid])

                for i, word in enumerate(words):
                    id_ = '%s-%s' % (vsid, i + 1)
                    word, loan = word
                    word = Word(id=id_, name=word, valueset=vs, loan=loan)

        return doculect
示例#2
0
文件: util.py 项目: HedvigS/grambank
def import_values(values, lang, features, codes, contributors,
                  sources):  # pragma: no cover
    c = Contribution(
        id=lang['ID'],
        name='Dataset for {0}'.format(lang['Name']),
    )
    for i, cid in enumerate(lang['Coders'], start=1):
        DBSession.add(
            ContributionContributor(
                contribution=c,
                contributor_pk=contributors[cid],
                ord=i,
            ))
    l = GrambankLanguage(
        id=lang['ID'],
        name=lang['Name'],
        macroarea=lang['Macroarea'],
        latitude=lang['Latitude'],
        longitude=lang['Longitude'],
    )
    for value in values:
        vs = ValueSet(
            id=value['ID'],
            parameter_pk=features[value['Parameter_ID']],
            language=l,
            contribution=c,
        )
        Value(id=value['ID'],
              valueset=vs,
              name=value['Value'],
              description=value['Comment'],
              domainelement_pk=codes[value['Code_ID']
                                     or '{}-NA'.format(value['Parameter_ID'])])

        if value['Source']:
            for ref in value['Source']:
                sid, pages = Sources.parse(ref)
                ValueSetReference(valueset=vs,
                                  source_pk=sources[sid],
                                  description=pages)
    DBSession.add(c)