def load_zitate(ws): sitzungen = {} mediathek = dict([(m['speech_source_url'], m) for m in ws['mediathek']]) sys.stdout.write("Loading transcripts") sys.stdout.flush() for i, speech in enumerate(ws['speech']): if i % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() s = (speech['wahlperiode'], speech['sitzung']) if s not in sitzungen: sitzungen[s] = Sitzung.query.filter_by( wahlperiode=speech.get('wahlperiode'), nummer=speech.get('sitzung') ).first() sitzung = sitzungen[s] zitat = Zitat.query.filter_by( sitzung=sitzung, sequenz=speech['sequence']).first() if zitat is None: zitat = Zitat() zitat.sitzung = sitzung zitat.sequenz = speech['sequence'] zitat.text = speech['text'] zitat.typ = speech['type'] zitat.sprecher = speech['speaker'] zitat.source_url = speech['source_url'] if speech['fingerprint']: zitat.person = Person.query.filter_by( fingerprint=speech['fingerprint'] ).first() db.session.add(zitat) db.session.flush() load_debatte_zitate(ws, zitat, mediathek) db.session.commit() SPME_CACHE.clear()
def load_zitate(engine, debatte, zitate, speeches): for data in zitate: f = lambda s: int(s['wahlperiode']) == int(data['wp']) and \ int(s['sitzung']) == int(data['session']) and \ int(s['sequence']) == int(data['sequence']) speech = filter(f, speeches).pop() #print speech zitat = Zitat.query.filter_by( debatte=debatte, sequenz=speech['sequence']).first() if zitat is None: zitat = Zitat() zitat.sitzung = debatte.sitzung zitat.debatte = debatte zitat.sequenz = speech['sequence'] zitat.text = speech['text'] zitat.typ = speech['type'] zitat.speech_id = data['speech_id'] zitat.sprecher = speech['speaker'] zitat.redner = data['speaker'] zitat.source_url = speech['source_url'] if speech['fingerprint']: if speech['fingerprint'] in SPEAKERS: zitat.person = SPEAKERS[speech['fingerprint']] else: zitat.person = Person.query.filter_by( fingerprint=speech['fingerprint'] ).first() SPEAKERS[speech['fingerprint']] = zitat.person db.session.add(zitat)
def load_zitate(engine, indexer, debatte, zitate, speeches, reden): for data in zitate: f = lambda s: int(s['wahlperiode']) == int(data['wp']) and \ int(s['sitzung']) == int(data['session']) and \ int(s['sequence']) == int(data['sequence']) speech = filter(f, speeches).pop() zitat = Zitat.query.filter_by( sitzung=debatte.sitzung, sequenz=speech['sequence']).first() if zitat is None: zitat = Zitat() zitat.sitzung = debatte.sitzung zitat.debatte = debatte zitat.sequenz = speech['sequence'] zitat.rede = reden[data['speech_id']] zitat.text = speech['text'] zitat.typ = speech['type'] zitat.sprecher = speech['speaker'] zitat.source_url = speech['source_url'] zitat.person = lazyload_person(engine, indexer, speech['fingerprint']) db.session.add(zitat) db.session.flush()