def parse(input_file): term = Term.objects.get(name=TERM) f = open(input_file, 'r') reader = csv.reader(f, delimiter=',', quotechar='"') for row in reader: first_name = row[1].strip() last_name = row[0].strip() budget = row[4].strip().replace(',', '') name = "%s %s" % (last_name, first_name) name = parse_tools.fix_mp_name(name) print "%-20s %-20s %10s" % (first_name, last_name, budget) try: member = Member.objects.get(name=name) tm = TermMember.objects.get(member=member, term=term) except Member.DoesNotExist: continue except TermMember.DoesNotExist: continue ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member) tm.election_budget = budget tm.save() ms.election_budget = budget ms.save() f.close()
def handle_row(src, q_info, row): name = parse_tools.fix_mp_name(row[0]) mp = Member.objects.filter(name=name) if not mp: print "%s not found" % name return mp = mp[0] print "%s" % mp for q in q_info: q_idx, i_idx, order, txt = q if not row[q_idx]: print "\tMissing value for column %d" % q_idx continue val = int(row[q_idx]) opt_idx = int(val * OPTION_STEPS // ANSWER_MAX) if opt_idx == OPTION_STEPS: opt_idx = OPTION_STEPS - 1 que = Question.objects.get(source=src, order=order) opt = Option.objects.get(question=que, order=opt_idx) try: ans = Answer.objects.get(member=mp, question=que) except Answer.DoesNotExist: ans = Answer(member=mp, question=que) ans.option = opt ans.explanation = None ans.save()
def parse(input_file): mp_list = Member.objects.active_in_term(term) mp_dict = {} for mp in mp_list: mp_dict[mp.name] = mp mp.found = False f = open(input_file, 'r') reader = csv.reader(f, delimiter=';', quotechar="'") reader.next() # skip header for row in reader: first_names = row[0].strip(" '").decode('utf8').split(' ') last_name = row[1].strip(" '").decode('utf8') last_name = unicode(last_name).replace(u'é', u'\u00e9') name = '%s %s' % (last_name, first_names[0]) name = parse_tools.fix_mp_name(name) try: member = mp_dict[name] except KeyError: # print "MP '%s' not found" % name continue member.found = True funding = float(row[6].replace(',', '.')) print "%30s: %.2f" % (unicode(member), funding) ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member) ms.election_budget = funding ms.save() # print unicode(member) for mp in mp_list: if mp.found: continue print "Funding data for MP '%s' not found" % unicode(mp) """budget = row[4].strip().replace(',', '') name = "%s %s" % (last_name, first_name) name = parse_tools.fix_mp_name(name) print "%-20s %-20s %10s" % (first_name, last_name, budget) try: member = Member.objects.get(name=name) tm = TermMember.objects.get(member=member, term=term) except Member.DoesNotExist: continue except TermMember.DoesNotExist: continue ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member) tm.election_budget = budget tm.save() ms.election_budget = budget ms.save()""" f.close()
def process_signatures(sign_el): ret = {} el_list = sign_el.xpath("./paivays") assert len(el_list) == 1 ret['date'] = el_list[0].attrib['pvm'] ret['mps'] = [] mp_list = sign_el.xpath("./edustaja/henkilo") for mp_el in mp_list: nr = int(mp_el.attrib['numero']) if nr > 10000: continue fname = mp_el.xpath("./etunimi")[0].text.strip() lname = mp_el.xpath("./sukunimi")[0].text.strip() name = "%s %s" % (lname, fname) name = parse_tools.fix_mp_name(name) ret['mps'].append(name) return ret
def handle_row(src, row, writer=None): (district, party, last_name, first_name, age, gender, is_indep) = row[0:7] (county, edu, intro, is_county_off, is_mp, is_mep, profession) = row[7:14] (home_page, rss_feed) = row[14:16] answer_list = row[16::3] importance_list = row[17::3] comment_list = row[18::3] name = ' '.join((last_name, first_name)) name = parse_tools.fix_mp_name(name) if writer: if district.endswith(' vaalipiiri'): district = district.replace(' vaalipiiri', '') row = [name, district, party, age, gender, edu] row = [s.encode('utf8') if s else '' for s in row] writer.writerow(row) mp = Member.objects.filter(name=name) if not mp: if is_mp != '0': print name return mp = mp[0] for idx, ans in enumerate(answer_list): if idx in SKIP_QUESTIONS: continue comment = comment_list[idx] if not ans and not comment: continue que = Question.objects.get(source=src, order=idx) if ans: opt = Option.objects.get(question=que, name=ans) else: opt = None try: ans = Answer.objects.get(member=mp, question=que) except Answer.DoesNotExist: ans = Answer(member=mp, question=que) ans.option = opt ans.explanation = comment ans.save()
def process_district(district): url = URL_BASE % district s = http_cache.open_url(url, 'funding') doc = html.fromstring(s) doc.make_links_absolute(url) el_list = doc.xpath(".//div[@class='listing_table']") for el in el_list: rows = el.xpath(".//tr") for row in rows: ch = row.getchildren()[0] if ch.tag == 'th': continue # print ch.text m = re.match('([\w -]+)[ ]{2,}([\w -]+)', ch.text, re.U) if not m: print "Skipping %s" % ch.text continue fnames = m.groups()[0].strip() lname = m.groups()[1].strip() name = "%s %s" % (lname, fnames.split(' ')[0]) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: continue mp = mp[0] links = row.xpath('.//a') link = None for l in links: href = l.attrib['href'] if l.text.strip() == "Ennakkoilmoitus": if not link: link = href elif l.text.strip() == "Vaalirahoitusilmoitus": link = href else: assert False assert link process_mp(mp, link)
def parse_mp(src, lname, fname, href): name = "%s %s" % (lname, fname) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: return mp = mp[0] if name in mp_dict: mp = mp_dict[name] mp.found = True print mp s = http_cache.open_url(href, 'opinions') doc = html.fromstring(s) q_list = doc.xpath(".//div[@class='em-compare-container']") for q_idx, q_el in enumerate(q_list): if q_idx in SKIP_QUESTIONS: continue el = q_el.xpath("./h3") assert len(el) == 1 q_text = el[0].text.strip() m = re.match(r'\d+\.\s+(.+)', q_text, re.U) assert m q_text = m.groups()[0] a_list = q_el.xpath(".//td[@class='em-text']") a_text_list = [] for a_idx, a_text in enumerate(a_list): a_text = a_text.text.strip() a_text_list.append(a_text) q_obj = add_question(src, q_text, q_idx, a_text_list) a_list = q_el.xpath(".//table[@class='em-compare-alts ']/tr") assert len(a_list) == len(a_text_list) chosen = None for a_idx, el in enumerate(a_list): if el.xpath(".//acronym"): assert not chosen chosen = a_idx if chosen == None: continue comm_el = q_el.xpath(".//div[@class='em-comment']") if comm_el: assert len(comm_el) == 1 comm_el = comm_el[0] text_list = [] for br in comm_el.xpath(".//br"): if not br.tail: continue s = br.tail.strip() if s: text_list.append(s) comm_text = '\n'.join(text_list) assert comm_text[0] == '"' and comm_text[-1] == '"' comm_text = comm_text[1:-1] else: comm_text = None opt = q_obj.opt_dict[chosen] try: ans = Answer.objects.get(member=mp, question=q_obj) except: ans = Answer(member=mp, question=q_obj) ans.option = opt ans.explanation = comm_text ans.save()