def position(): name_search = models.NameSearch( models.Person.query .join(models.Mandate) .filter(models.Mandate.year == 2012) .all() ) position_patcher = TablePatcher( models.Position, models.db.session, key_columns=['person_id', 'interval', 'title'], ) with position_patcher.process(remove=True) as add_position: for row in get_gdrive_csv(POSITION_PONTA2_CSV_KEY): if row['temporary'].strip(): continue name = row['name'].strip() matches = name_search.find(name) if len(matches) == 1: [person] = matches interval = parse_interval(row['start_date'], row['end_date']) add_position({ 'person_id': person.id, 'interval': interval, 'title': row['title'], 'url': row['url'] or None, }) elif len(matches) > 1: logger.warn("Multiple matches for %r", name) else: logger.warn("No matches for %r", name) for row in get_gdrive_csv(POSITION_BIROU_CDEP_CSV_KEY): name = row['name'].strip() matches = name_search.find(name) assert len(matches) == 1, \ "Expected a single match for %r, got %r" % (name, matches) [person] = matches add_position({ 'person_id': person.id, 'interval': parse_interval(row['start_date'], row['end_date']), 'title': row['title'] + ", Biroul Permanent", }) models.db.session.commit()
def get_committee_policy(): patcher = TablePatcher( models.MpCommittee, models.db.session, key_columns=['id'], ) with patcher.process() as update_committee: for row in get_gdrive_csv(COMMITTEE_POLICY_CSV_KEY): slug = row['policy'] policy_id = None if slug: policy = models.PolicyDomain.query.filter_by(slug=slug).first() if policy is None: logger.warn("Unknown policy domain %r", slug) else: policy_id = policy.id update_committee( dict(id=row['id'], policy_domain_id=policy_id), create=False ) models.db.session.commit()
def get_proposal_controversy(): """ Update proposal controversies from csv""" def extract_proposal(url): return url[url.rfind('/') + 1:] controversy_patcher = TablePatcher(models.ProposalControversy, models.db.session, key_columns=['proposal_id']) with controversy_patcher.process(remove=True) as add: for row in get_gdrive_csv(PROPOSAL_CONTROVERSY_CSV_KEY): proposal_id = extract_proposal(row['Link MP Tracker']) if not proposal_id: continue assert models.Proposal.query.get(proposal_id) record = { 'proposal_id': proposal_id, 'title': row['Titlu'], 'reason': row['Motive controversa'], 'press_links': row['Link presa'], } add(record) models.db.session.commit()
def get_vote_controversy(no_commit=False): controversy_patcher = TablePatcher( models.VotingSessionControversy, models.db.session, key_columns=['voting_session_id'], ) with controversy_patcher.process(remove=True) as add_controversy: for line in get_gdrive_csv(CONTROVERSY_CSV_KEY): add_controversy({ 'title': line['title'], 'status': line['status'], 'reason': line['motive'], 'vote_meaning_yes': line['info_da'], 'vote_meaning_no': line['info_nu'], 'press_links': line['link_presa'], 'voting_session_id': line['mptracker_url'].split('/votes/')[1], }) if no_commit: logger.warn("Rolling back the transaction") models.db.session.rollback() else: models.db.session.commit()
def stop_words(): patcher = TablePatcher( models.Stopword, models.db.session, key_columns=['id'], ) with patcher.process(remove=True) as add_stop_word: for row in get_gdrive_csv(STOP_WORDS_CSV_KEY): add_stop_word(row) models.db.session.commit()
def policy_domain(): patcher = TablePatcher( models.PolicyDomain, models.db.session, key_columns=['slug'], ) with patcher.process(remove=True) as add_policy_domain: for row in get_gdrive_csv(POLICY_DOMAIN_CSV_KEY): add_policy_domain(row) models.db.session.commit()
def stop_words(): from mptracker.nlp import normalize_to_ascii patcher = TablePatcher( models.Stopword, models.db.session, key_columns=['id'], ) with patcher.process(remove=True) as add_stop_word: for row in get_gdrive_csv(STOP_WORDS_CSV_KEY): add_stop_word({'id': normalize_to_ascii(row['id'])}) models.db.session.commit()
def get_member_count(): patcher = TablePatcher( models.MemberCount, models.db.session, key_columns=['short_name', 'year'], ) with patcher.process(remove=True) as add_member_count: for row in get_gdrive_csv(MEMBER_COUNT_CSV_KEY): short_name = row.pop('') for year, count in row.items(): add_member_count({ 'short_name': short_name, 'year': int(year), 'count': int(count), }) models.db.session.commit()
def cabinet_party(): patcher = TablePatcher( models.CabinetMembership, models.db.session, key_columns=['mp_group_id', 'interval'], ) group_by_code = {g.short_name: g for g in models.MpGroup.query} with patcher.process(remove=True) as add_membership: for row in get_gdrive_csv(CABINET_PARTY_CSV_KEY): assert row['legislature'] == '2012' group = group_by_code[row['code']] add_membership({ 'mp_group_id': group.id, 'interval': parse_interval(row['start_date'], row['end_date']), }) models.db.session.commit()
def controversy(): old_voting_sessions = set( models.VotingSession.query .filter(models.VotingSession.controversy_id != None) .all() ) controversy_map = {} for line in get_gdrive_csv(CONTROVERSY_CSV_KEY): cdeppk = url_args(line['link']).get('idv', type=int) slug = line['slug'] if slug not in controversy_map: controversy_map[slug] = { 'data': { 'slug': slug, 'title': line['title'], }, 'voting_session_rows': [], } voting_session = ( models.VotingSession.query .filter_by(cdeppk=cdeppk) .first() ) controversy_map[slug]['voting_session_rows'].append(voting_session) controversy_patcher = TablePatcher( models.Controversy, models.db.session, key_columns=['slug'], ) with controversy_patcher.process(remove=True) as add_controversy: for controversy in controversy_map.values(): result = add_controversy(controversy['data']) controversy['row'] = result.row models.db.session.flush() voting_session_patcher = TablePatcher( models.VotingSession, models.db.session, key_columns=['id'], ) new_voting_sessions = set() with voting_session_patcher.process() as add_voting_session: for controversy in controversy_map.values(): for voting_session in controversy['voting_session_rows']: data = { 'id': voting_session.id, 'controversy_id': controversy['row'].id, } add_voting_session(data, create=False) new_voting_sessions.add(voting_session) for voting_session in old_voting_sessions - new_voting_sessions: add_voting_session({ 'id': voting_session.id, 'controversy_id': None, }) models.db.session.commit()
def cabinet_position_row_iter(): yield from get_gdrive_csv(POSITION_PONTA2_CSV_KEY) yield from get_gdrive_csv(POSITION_PONTA3_CSV_KEY)
def get_committee_attendance(no_commit=False): # Nume # Comisia Permanenta 1 # Numar sedinte comisia permanenta 1 # Numar prezente deputat la sedintele comisiei 1 in 2013 # Comisia Permanenta 2 # Numar sedinte comisia permanenta 2 # Numar prezente deputat la sedintele comisiei 2 in 2013 # Comisia Speciala # Numar sedinte comisia speciala # Numar prezente deputat la sedintele comisiei speciale in 2013 person_map = { p.name: p for p in ( models.Person.query .join(models.Person.mandates) .filter_by(year=2012) ) } committee_map = { re.sub(r'\s+', ' ', c.name): c for c in ( models.MpCommittee.query .filter(models.MpCommittee.chamber_id.in_([0, 2])) ) } def parse_committee_data(row, number): name = row['Comisia Permanenta %d' % number].strip() name = re.sub(r'\s+', ' ', name) if name in ['', '0']: return None if 'Subcomisia pentru' in name: logger.warn("Skipping committee %r", name) return None if name not in committee_map: logger.warn("Skipping membership %r %r", row['Nume'], name) return None committee = committee_map[name] meetings_2013_txt = row['Numar sedinte comisia permanenta %d' % number] attended_2013_txt = row['Numar prezente deputat la sedintele ' 'comisiei %d in 2013' % number] try: meetings_2013 = int(meetings_2013_txt) attended_2013 = int(attended_2013_txt) except ValueError: #logger.warn("Skipping numbers: %r %r", # meetings_2013_txt, attended_2013_txt) return None if meetings_2013 == 0: return None attendance_2013 = attended_2013 / meetings_2013 return (committee, attendance_2013) for row in get_gdrive_csv(COMMITTEE_ROLL_CALL_CSV_KEY): person = person_map[row['Nume'].strip()] mandate = ( person.mandates .filter_by(year=2012) .order_by('interval') .first() ) for n in [1, 2]: _data = parse_committee_data(row, n) if _data is None: continue (committee, attendance_2013) = _data membership = ( committee.memberships .filter_by(mandate=mandate) .order_by('interval') .first() ) if membership.attendance_2013 is not None: if not almost_eq(membership.attendance_2013, attendance_2013): logger.warn( "Updating attendance: %r %r -> %r", person.name, membership.attendance_2013, attendance_2013, ) membership.attendance_2013 = attendance_2013 if no_commit: logger.warn("Rolling back the transaction") models.db.session.rollback() else: models.db.session.commit()