def extract_data(endangerment): # pragma: no cover status = {} lpks = DBSession.query(common.Language.pk) \ .filter(common.Language.active == True) \ .filter(common.Language.latitude != None) \ .filter(Languoid.level == LanguoidLevel.language) \ .order_by(common.Language.pk).all() print(len(lpks)) sql = """\ select ls.source_pk, count(ls.language_pk) from languagesource as ls, ref as r where ls.source_pk = r.pk and r.ca_doctype_trigger is null and r.ca_language_trigger is null group by source_pk """ lcounts = {r[0]: r[1] for r in DBSession.execute(sql)} # loop over active, established languages with geo-coords for i, lpk in enumerate(lpks): l = DBSession.query(common.Language).filter(common.Language.pk == lpk).one() # let's collect the relevant sources in a way that allows computation of med. # Note: we limit refs to the ones without computerized assignments. sources = list(DBSession.query(Ref).join(common.LanguageSource) \ .filter(common.LanguageSource.language_pk == lpk) \ .filter(Ref.ca_doctype_trigger == None) \ .filter(Ref.ca_language_trigger == None) \ .options(joinedload(Ref.doctypes))) sources = sorted([Source(s, lcounts.get(s.pk, 0)) for s in sources]) # keep the overall med # note: this source may not be included in the potential meds computed # below, # e.g. because it may not have a year. med = sources[0].__json__() if sources else None # now we have to compute meds respecting a cut-off year. # to do so, we collect eligible sources per year and then # take the med of this collection. potential_meds = [] # we only have to loop over publication years within all sources, because # only in these years something better might have come along. for year in set(s.year for s in sources if s.year): # let's see if something better was published! eligible = [s for s in sources if s.year and s.year <= year] if eligible: potential_meds.append(sorted(eligible)[0]) # we store the precomputed sources information as jsondata: status[l.id] = [ med, [s.__json__() for s in sorted(set(potential_meds), key=lambda s: -s.year)], endangerment.get(l.id, {}).get('source') ] if i and i % 1000 == 0: print(i) DBSession.close() return status
def data(settings): from clld.db.meta import Base, DBSession engine = sa.engine_from_config(settings) Base.metadata.create_all(bind=engine) DBSession.configure(bind=engine) yield engine DBSession.close()
def db(url='sqlite://'): from clld.db.meta import Base, DBSession engine = sa.create_engine(url) Base.metadata.create_all(bind=engine) DBSession.configure(bind=engine) yield engine DBSession.close()
def data(settings): from clld.db.meta import Base, DBSession, VersionedDBSession engine = sa.engine_from_config(settings) Base.metadata.create_all(bind=engine) DBSession.configure(bind=engine) VersionedDBSession.configure(bind=engine) yield engine DBSession.close() VersionedDBSession.close()