def testapp(): from webtest import TestApp from clld.db.meta import DBSession, VersionedDBSession, Base from clld.db.models import common from clld_cognacy_plugin.models import Cognateset, Cognate def main(): cfg = config.Configurator(settings={ 'sqlalchemy.url': 'sqlite://', 'mako.directories': [ 'clld:web/templates', 'clld_cognacy_plugin:templates' ]}) cfg.include('clld.web.app') cfg.include('clld_cognacy_plugin') return cfg.make_wsgi_app() DBSession.remove() VersionedDBSession.remove() wsgi_app = main() Base.metadata.bind = DBSession.bind Base.metadata.create_all() DBSession.add(common.Dataset(id='1', name='test app', domain='example.org')) cs = Cognateset(id='1', name='cs: test') lang = common.Language(id='l', latitude=2, longitude=2) param = common.Parameter(id='l') vs = common.ValueSet(id='vs', language=lang, parameter=param) v = common.Value(id='v', name='abc', valueset=vs) DBSession.add(Cognate(cognateset=cs, counterpart=v)) yield TestApp(wsgi_app)
def setUp(self): TestWithDb.setUp(self) DBSession.add( common.Dataset(id='dataset', name='dataset', description='desc', domain='clld')) source = common.Source(id='source') contributors = { 'contributor': 'A Name', 'b': 'b Name', 'c': 'c Name', 'd': 'd Name' } for id_, name in contributors.items(): contributors[id_] = common.Contributor(id=id_, name=name) contribution = common.Contribution(id='contribution', name='Contribution') cr = common.ContributionReference(contribution=contribution, source=source) assert common.ContributionContributor( contribution=contribution, primary=True, contributor=contributors['contributor']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['b']) assert common.ContributionContributor(contribution=contribution, primary=True, contributor=contributors['c']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['d']) DBSession.add(contribution) language = common.Language(id='language', name='Language 1', latitude=10.5, longitude=0.3) language.sources.append(source) identifier = common.Identifier(type='iso639-3', id='iso') li = common.LanguageIdentifier(language=language, identifier=identifier) for i in range(2, 102): _l = common.Language(id='l%s' % i, name='Language %s' % i) _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='%.3i' % i) _li = common.LanguageIdentifier(language=_l, identifier=_i) DBSession.add(_l) param = common.Parameter(id='parameter', name='Parameter') de = common.DomainElement(id='de', name='DomainElement', parameter=param) de2 = common.DomainElement(id='de2', name='DomainElement2', parameter=param) valueset = common.ValueSet(id='valueset', language=language, parameter=param, contribution=contribution) value = common.Value(id='value', domainelement=de, valueset=valueset, frequency=50, confidence='high') DBSession.add(value) paramnd = common.Parameter(id='no-domain', name='Parameter without domain') valueset = common.ValueSet(id='vs2', language=language, parameter=paramnd, contribution=contribution) vr = common.ValueSetReference(valueset=valueset, source=source) value = common.Value(id='v2', valueset=valueset, frequency=50, confidence='high') DBSession.add(value) unit = common.Unit(id='unit', name='Unit', language=language) up = common.UnitParameter(id='unitparameter', name='UnitParameter') DBSession.add(unit) DBSession.add( common.UnitValue(id='unitvalue', name='UnitValue', unit=unit, unitparameter=up)) up2 = common.UnitParameter(id='up2', name='UnitParameter with domain') de = common.UnitDomainElement(id='de', name='de', parameter=up2) DBSession.add( common.UnitValue(id='uv2', name='UnitValue2', unit=unit, unitparameter=up2, unitdomainelement=de)) DBSession.add(common.Source(id='s')) sentence = common.Sentence(id='sentence', name='sentence name', description='sentence description', analyzed='a\tmorpheme\tdoes\tdo', gloss='a\tmorpheme\t1SG\tdo.SG2', source='own', comment='comment', original_script='a morpheme', language=language) sr = common.SentenceReference(sentence=sentence, source=source) DBSession.add(common.Config(key='key', value='value')) DBSession.flush()
def add_cultural_data(questionnaire_file_name, parameters, language): """ Parse the typological questionnaire into the database """ contribution_text, parameter_descriptions, answers = parse_culture_questionnaire( os.path.join(DBPATH, questionnaire_file_name)) # All ValueSets must be related to a contribution, so generate one from the metadata. contrib = common.Contribution(id='contrib' + newid(), name=contribution_text + newid()) for p, parameter in parameter_descriptions.iterrows(): # First, make sure that this parameter exists – either look it up or create it. pid = p.replace(".", "-") try: param, domain = parameters[pid] except KeyError: param = common.Parameter( id='culture' + pid, name=p, description=parameter['Question_text_English'], markup_description=parameter['Question_text_English']) domain = {} parameters[pid] = (param, domain) # Secondly, check whether we are aware that this answer is # valid already – otherwise we add its value to the domain, # and use that. # Note: Once we have a database, we can do better filtering # and constraining, and don't need to rely on reasonable data. answer = str(answers["Answer"][p]) try: domain_element = domain[slug(answer)] except KeyError: try: numerical_value = int(answer) except ValueError: numerical_value = ( 1 if answer == "Y" or answer == 'True' else 0 if answer == "N" or answer == 'False' else None) domain_element = common.DomainElement( id=param.id + slug(answer), description=answer, number=numerical_value, name=answer, parameter=param, abbr=answer, jsondata={'color': color(numerical_value)}) DBSession.add(domain_element) try: DBSession.flush() except: print(domain, domain_element, language.name, pid, param.name) domain[slug(answer)] = domain_element # Now create the ValueSet, representing all values the # language has for this parameter vs = common.ValueSet(id='vs' + newid(), language=language, parameter=param, jsondata=domain_element.jsondata, contribution=contrib) # and fill in the actual values, which in this case is only # one. This object, and all objects it depends on, are then # scheduled for writing into the database. DBSession.add( common.Value(id='v' + newid(), valueset=vs, frequency=float(100), jsondata=domain_element.jsondata, domainelement=domain_element)) # Execute all scheduled database updates. DBSession.flush()
def update(args): pid, cid = 'vitality', 'unesco' count = 0 notfound = {} contrib = common.Contribution.get(cid, default=None) if not contrib: contrib = common.Contribution( id=cid, name='Atlas of the World’s Languages in Danger', description= 'Atlas of the World’s Languages in Danger, © UNESCO, http://www.unesco.org/culture/languages-atlas' ) param = common.Parameter.get(pid, default=None) if param is None: param = common.Parameter(id=pid, name='Degree of endangerment') domain = {de.name: de for de in param.domain} for i, spec in enumerate(VITALITY_VALUES): name, desc = spec if name not in domain: number = i + 1 domain[name] = common.DomainElement(id='%s-%s' % (pid, number), name=name, description=desc, number=number, parameter=param) valuesets = {vs.id: vs for vs in param.valuesets} for record in et.parse(args.data_file(DATA_FILE)).findall('.//RECORD'): item = {} for attr in [ 'ID', 'Name in English', 'Name in French', 'Name in Spanish', 'Countries', 'Country codes alpha 3', 'ISO639-3 codes', 'Degree of endangerment' ]: item[attr] = record.find(attr.replace(' ', '_')).text if item['ISO639-3 codes']: for code in item['ISO639-3 codes'].split(','): code = code.strip() lang = Languoid.get(code, key='hid', default=None) if lang: count += 1 item[ 'url'] = 'http://www.unesco.org/culture/languages-atlas/en/atlasmap/language-iso-%s.html' % code lang.update_jsondata(unesco=item) de = domain[item['Degree of endangerment']] vsid = '%s-%s' % (pid, lang.id) vs = valuesets.get(vsid) if not vs: vs = common.ValueSet(id='vitality-%s' % lang.id, parameter=param, contribution=contrib, language=lang) DBSession.add( common.Value(valueset=vs, name=de.name, domainelement=de)) valuesets[vsid] = vs else: vs.values[0].domainelement = de else: notfound[code] = 1 print 'assigned', count, 'unesco urls' print 'missing iso codes:', notfound
def load(): wals = create_engine('postgresql://robert@/wals3') contributor = common.Contributor(id='gastvolker', name='Volker Gast') contribution = common.Contribution( id='tdir', name='Typological Database of Intensifiers and Reflexives') cc = common.ContributionContributor( contribution=contribution, contributor=contributor) DBSession.add(cc) for row in read('glosses'): DBSession.add(common.GlossAbbreviation(id=row['gloss'], name=row['explanation'])) params = {} for id_, name in PARAMS.items(): params[id_] = common.Parameter(id='tdir-' + id_, name=name) DBSession.add(params[id_]) # # TODO: domain for sortal restrictions! # values = {} languages = {} for row in read('languages'): if row['adn'] and '<br>' in row['adn']: row['adn'], other = row['adn'].split('<br>', 1) if not row['otherint']: row['otherint'] = '' row['otherint'] = '\n'.join(filter(None, row['otherint'].split('<br>') + other.split('<br>'))) row['sil'] = row['sil'].lower() row['sil'] = { 'arm': 'hye', 'vmn': 'mig', 'gli': 'gle', 'grk': 'ell', 'hbr': 'heb', 'ltn': 'lat', 'chn': 'cmn', 'ota': 'ote', 'pnj': 'pan', 'pba': 'rap', 'esg': 'kal', 'vla': 'zea', 'lat': 'lav', }.get(row['sil'], row['sil']) l = common.Language(id=row['sil'].lower(), name=row['language']) languages[row['language']] = l res = wals.execute("select l.latitude, l.longitude from language as l, languageidentifier as li, identifier as i where l.pk = li.language_pk and li.identifier_pk = i.pk and i.id = '%s' and i.type = 'iso639-3';" \ % row['sil']).fetchone() if not res: res = wals.execute("select latitude, longitude from language where name = '%s';" % row['language']).fetchone() if res: l.latitude, l.longitude = res else: print(row['language'], row['sil']) #(u'Classical Nahuatl', u'nci') ??? #(u'Ancient Greek', u'gko') for pid in params.keys(): value = row[pid] if value: value = common.Value( id='tdir-%s-%s' % (pid, l.id), name=unicode(bs(value)), contribution=contribution, parameter=params[pid], language=l) values['%s-%s' % (pid, row['language'])] = value DBSession.add(value) def normalize_ref(ref): ref = re.sub('\s+', ' ', ref).strip() return unicode(bs(ref)).replace('<i>', '"').replace('</i>', '"') """ Ogawa, A. (1998) Wali, K. et al. (2000) Lyutikova. -> Lyutikova, se-Bertit -> se-Berit missing refs: Sengupta, G. (2000). Lexical anaphors and pronouns in Bangla. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter. Davison, A. Mistry (2000). Lexical anaphors and pronouns in Hindi/Urdu. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter. """ refs = {} for row in read('references'): name = re.sub('\s+', ' ', row['entry'].split(').')[0].strip()) + ')' src = common.Source( id=row['ref'].strip(), name=name, description=normalize_ref(row['entry'])) refs[name] = src DBSession.add(src) for row in read('examples'): if row['language'] not in languages: print('example for unknown language "%s"' % row['language']) continue s = common.Sentence( id=row['Nr'].strip(), name=fix_example(row['original'], repl=' '), language=languages[row['language']], analyzed=fix_example(row['original']), gloss=fix_example(row['gloss']), description=row['translation'], source=row['source'], comment=row['comments']) has_refs = False for ref in refs: if ref in row['source']: if normalize_ref(row['source']) != refs[ref].description: print('-->') print(row['source']) has_refs = True common.SentenceReference(sentence=s, source=refs[ref]) if not has_refs: print('+++++') print(row['source']) pid = EXAMPLE_MAP[row['pov']] if pid: # associate with value! o = common.ValueSentence(value=values['%s-%s' % (pid, row['language'])], sentence=s) DBSession.add(s)
def setUp(self): TestWithDb.setUp(self) DBSession.add( common.Dataset(id='dataset', name='dataset', description='desc', domain='clld', jsondata={'license_icon': 'cc-by'})) DBSession.add( common.Source(id='replaced', active=False, jsondata={'__replacement_id__': 'source'})) source = common.Source(id='source') contributors = { 'contributor': 'A Name', 'b': 'b Name', 'c': 'c Name', 'd': 'd Name' } for id_, name in contributors.items(): contributors[id_] = common.Contributor(id=id_, name=name, url='http://example.org') contribution = common.Contribution(id='contribution', name='Contribution') common.ContributionReference(contribution=contribution, source=source) assert common.ContributionContributor( contribution=contribution, primary=True, contributor=contributors['contributor']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['b']) assert common.ContributionContributor(contribution=contribution, primary=True, contributor=contributors['c']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['d']) DBSession.add(contribution) language = common.Language(id='language', name='Language 1', latitude=10.5, longitude=0.3) language.sources.append(source) for i, type_ in enumerate(common.IdentifierType): id_ = common.Identifier(type=type_.value, id=type_.value + str(i), name='abc') common.LanguageIdentifier(language=language, identifier=id_) for i in range(2, 102): _l = common.Language(id='l%s' % i, name='Language %s' % i) _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='%.3i' % i) common.LanguageIdentifier(language=_l, identifier=_i) DBSession.add(_l) param = common.Parameter(id='parameter', name='Parameter') de = common.DomainElement(id='de', name='DomainElement', parameter=param) de2 = common.DomainElement(id='de2', name='DomainElement2', parameter=param) valueset = common.ValueSet(id='valueset', language=language, parameter=param, contribution=contribution) value = common.Value(id='value', domainelement=de, valueset=valueset, frequency=50, confidence='high') DBSession.add(value) value2 = common.Value(id='value2', domainelement=de2, valueset=valueset, frequency=50, confidence='high') DBSession.add(value2) paramnd = common.Parameter(id='no-domain', name='Parameter without domain') valueset = common.ValueSet(id='vs2', language=language, parameter=paramnd, contribution=contribution) common.ValueSetReference(valueset=valueset, source=source) value = common.Value(id='v2', valueset=valueset, frequency=50, confidence='high') DBSession.add(value) unit = common.Unit(id='unit', name='Unit', language=language) up = common.UnitParameter(id='unitparameter', name='UnitParameter') DBSession.add(unit) DBSession.add( common.UnitValue(id='unitvalue', name='UnitValue', unit=unit, unitparameter=up)) up2 = common.UnitParameter(id='up2', name='UnitParameter with domain') de = common.UnitDomainElement(id='de', name='de', parameter=up2) DBSession.add( common.UnitValue(id='uv2', name='UnitValue2', unit=unit, unitparameter=up2, unitdomainelement=de)) DBSession.add(common.Source(id='s')) sentence = common.Sentence( id='sentence', name='sentence name', description='sentence description', analyzed='a\tmorpheme\tdoes\tdo', gloss='a\tmorpheme\t1SG\tdo.SG2', source='own', comment='comment', original_script='a morpheme', language=language, jsondata={'alt_translation': 'Spanish: ...'}) common.SentenceReference(sentence=sentence, source=source) DBSession.add(common.Config(key='key', value='value')) common.Config.add_replacement('replaced', 'language', model=common.Language) common.Config.add_replacement('gone', None, model=common.Language) DBSession.flush()