def load(): legs= leg.load() for x in sorted(legs['wp'].keys()): idsobj= legs['wp'][x]['id'] name = legs['wp'][x]['name']['official_full'] congid = idsobj['govtrack'] wiki= idsobj['opencongwiki'] if (not wiki == "Error" ): continue xml = cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid) xml =xml.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>","") xmlio = StringIO(xml) tree = etree.parse(xmlio) name = unicode(tree.xpath("//unaccented-name/text()")[0]) name = name.replace(" ","_") url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name #url = "http://www.opencongress.org/wiki/%s" % name try : data = cache.cacheweb( url) except Exception, e: print "failed", name ,e except KeyboardInterrupt: print "bye" exit()
def load(): legs = leg.load() out = open("report.wiki", 'w') for x in sorted(legs['wp'].keys()): idsobj = legs['wp'][x]['id'] gt = idsobj['govtrack'] oname = idsobj['opencongwiki'] name = unicode(oname) name = name.encode('utf-8') name = urllib.quote_plus(name) url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name try: data = cache.cacheweb(url) except urllib2.HTTPError, e: if (e.code == 404): p = 'http://www.opencongress.org/people/show/%d' % gt # print u"* Missing [[" , unicode(oname) , u"]] from [", unicode(p), u" ", unicode(oname) , u"]" s = u"* Missing [[{}]] from [{} {}]\n".format( unicode(oname), p, unicode(oname)) print s out.write(s.encode('utf-8')) idsobj['opencongwiki'] = "Error" except Exception, e: # print "Missing [[",name,"]]" print "failed", name, e idsobj['opencongwiki'] = "Error2"
def process(filename, verbose): u""" Process the file """ if verbose: print("reading", filename) # fileobj = open(filename, "rb") wiki = open(filename + '.wiki', "wb") table = open(filename + '.wikitable', "wb") legs = leg.load() # index fecids = {} for fec_id in sorted(legs['wp'].keys()): if 'fec' in legs['wp'][fec_id]['id']: for field in legs['wp'][fec_id]['id']['fec']: fecids[field] = legs['wp'][fec_id] for line in fileobj.read().split("\n"): if line[0:4] == "SB23": # print "---" fields = line.split("") wikify_fields(fecids, fields, table, wiki) wiki.close() table.close()
def load(): legs= leg.load() out = open("report.wiki", 'w') for x in sorted(legs['wp'].keys()): idsobj= legs['wp'][x]['id'] gt = idsobj['govtrack'] oname = idsobj['opencongwiki'] name = unicode(oname) name = name.encode('utf-8') name= urllib.quote_plus(name) url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name try : data = cache.cacheweb( url) except urllib2.HTTPError, e: if ( e.code == 404) : p='http://www.opencongress.org/people/show/%d' % gt # print u"* Missing [[" , unicode(oname) , u"]] from [", unicode(p), u" ", unicode(oname) , u"]" s=u"* Missing [[{}]] from [{} {}]\n".format(unicode(oname),p,unicode(oname)) print s out.write(s.encode('utf-8')) idsobj['opencongwiki']="Error" except Exception, e: # print "Missing [[",name,"]]" print "failed", name ,e idsobj['opencongwiki']="Error2"
def load(): legs= leg.load() for x in sorted(legs['wp'].keys()): idsobj= legs['wp'][x]['id'] if 'govtrack' in idsobj: congid = idsobj['govtrack'] cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid) htmlstr = cache.cacheweb('http://www.opencongress.org/people/show/%d' % congid) parse(htmlstr,idsobj) dump.dump(legs)
def load (filename): #filename= "maplight-convert/FEC2012c2.csv" fecs = {} legs= leg.load() for x in sorted(legs['wp'].keys()): obj= legs['wp'][x] idsobj= obj['id'] name = obj['name']['official_full'] if 'fec' in idsobj: for fec in idsobj['fec']: #print "fec",fec fecs[fec]=obj fieldnames=[ "TransactionTypeCode","TransactionType","ElectionCycle","ReportingCommitteeMLID","ReportingCommitteeFECID","ReportingCommitteeName","ReportingCommitteeNameNormalized","PrimaryGeneralIndicator","TransactionID","FileNumber","RecordNumberML","RecordNumberFEC","TransactionDate","TransactionAmount", "RecipientName","RecipientNameNormalized","RecipientCity","RecipientState","RecipientZipCode","RecipientCommitteeMLID","RecipientCommitteeFECID","RecipientCommitteeName","RecipientCommitteeNameNormalized","RecipientCommitteeTreasurer","RecipientCommitteeDesignationCode","RecipientCommitteeDesignation","RecipientCommitteeTypeCode","RecipientCommitteeType","RecipientCommitteeParty","RecipientCandidateMLID","RecipientCandidateFECID","RecipientCandidateName","RecipientCandidateNameNormalized","RecipientCandidateParty","RecipientCandidateICO","RecipientCandidateStatus","RecipientCandidateOfficeState","RecipientCandidateOffice","RecipientCandidateDistrict","RecipientCandidateGender", "DonorName","DonorNameNormalized","DonorCity","DonorState","DonorZipCode","DonorEmployer","DonorEmployerNormalized","DonorOccupation","DonorOccupationNormalized","DonorOrganization","DonorEntityTypeCode","DonorEntityType","DonorCommitteeMLID","DonorCommitteeFECID","DonorCommitteeName","DonorCommitteeNameNormalized","DonorCommitteeTreasurer","DonorCommitteeDesignationCode","DonorCommitteeDesignation","DonorCommitteeTypeCode","DonorCommitteeType","DonorCommitteeParty","DonorCandidateMLID","DonorCandidateFECID","DonorCandidateName","DonorCandidateNameNormalized","DonorCandidateParty","DonorCandidateICO","DonorCandidateStatus","DonorCandidateOfficeState","DonorCandidateOffice","DonorCandidateDistrict","DonorCandidateGender","UpdateTimestamp" ] fec_dict_reader = csv.DictReader(open(filename), delimiter=',', quotechar='"', restkey=100, fieldnames=fieldnames) from collections import defaultdict matrix = defaultdict(dict) #print fec_dict_reader.fieldnames f = open(filename + ".xml", 'w') for line in fec_dict_reader: for k in fieldnames : d = line[k] if (isinstance( d, int )): d= str(d) if d is not None and d != '': val = line['TransactionAmount'] if (val == 'TransactionAmount'): continue if (len(val)> 0): try: val = int(val) except: print "'%s'" % val, "failed" else: val = 0 if string.find(k,"CandidateFECID") > 0: if d not in fecs: fecs[d]= {} fecs[d]["fec_2012_2total"] =val else: if "fec_2012_2total" in fecs[d]: fecs[d]["fec_2012_2total"] = fecs[d]["fec_2012_2total"] + val else: fecs[d]["fec_2012_2total"] = val dump.dump(fecs)
def load(): legs = leg.load() for x in sorted(legs['wp'].keys()): idsobj = legs['wp'][x]['id'] if 'govtrack' in idsobj: congid = idsobj['govtrack'] cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid) htmlstr = cache.cacheweb( 'http://www.opencongress.org/people/show/%d' % congid) parse(htmlstr, idsobj) dump.dump(legs)
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "hv", []) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() else: assert False, "unhandled option" legs = legislators_current.load() #print legs # print legs['wp'].keys() # print legs['wp'].items() names = sorted(legs['wp'].keys()) # print names for x in names: # print legs['wp'][x] try: d = wiki.parse_wiki_source(x, legs) A = legs['wp'][x] #check_imdb(x,A,d) #check_wikipedia(x,A,d) #compare_cong(x,A,d) #compare_votesmart(x,A,d) compare_washpo(x, A, d) except Exception, e: print "error1:", e
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "hv", []) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() else: assert False, "unhandled option" legs= legislators_current.load() #print legs # print legs['wp'].keys() # print legs['wp'].items() names = sorted(legs['wp'].keys()) # print names for x in names: # print legs['wp'][x] try : d =wiki.parse_wiki_source(x,legs) A = legs['wp'][x] #check_imdb(x,A,d) #check_wikipedia(x,A,d) #compare_cong(x,A,d) #compare_votesmart(x,A,d) compare_washpo(x,A,d) except Exception,e: print "error1:",e
import Ballotpedia_house as breps import List_of_current_United_States_Senators as sens import Ballotpedia_senator as bsens import cache import legislators_current as leg from cStringIO import StringIO rep = cache.cache('reps', reps.parse_rep) brep = cache.cache('breps', breps.parse) sen = cache.cache('sen', sens.parse) bsen = cache.cache('bsen', bsens.parse) congress = {'wp': rep['wp'].copy()} congress['wp'].update(sen['wp']) legs = leg.load() def compare(a, b, name, link): for x in sorted(a['wp'].keys()): aobj = a['wp'][x] # print aobj['id'] if link not in aobj['id']: pass else: bid = aobj['id'][link] bid = bid.replace(" ", "_") if bid not in b['wp']: # print "missing in B ", bid # print sorted(b['wp'].keys()) continue
import csv import string import legislators_current as leg import dump fecs = {} legs= leg.load() for x in sorted(legs['wp'].keys()): obj= legs['wp'][x] idsobj= obj['id'] name = obj['name']['official_full'] if 'fec' in idsobj: for fec in idsobj['fec']: # print "fec",fec fecs[fec]=obj filename= "maplight-convert/FEC2012c1_1.csv" fieldnames=[ "TransactionTypeCode","TransactionType","ElectionCycle","ReportingCommitteeMLID","ReportingCommitteeFECID","ReportingCommitteeName","ReportingCommitteeNameNormalized","PrimaryGeneralIndicator","TransactionID","FileNumber","RecordNumberML","RecordNumberFEC","TransactionDate","TransactionAmount","RecipientName","RecipientNameNormalized","RecipientCity","RecipientState","RecipientZipCode","RecipientEmployer","RecipientEmployerNormalized","RecipientOccupation","RecipientOccupationNormalized","RecipientOrganization","RecipientEntityTypeCode","RecipientEntityType","RecipientCommitteeMLID","RecipientCommitteeFECID","RecipientCommitteeName","RecipientCommitteeNameNormalized","RecipientCommitteeTreasurer","RecipientCommitteeDesignationCode","RecipientCommitteeDesignation","RecipientCommitteeTypeCode","RecipientCommitteeType","RecipientCommitteeParty","RecipientCandidateMLID","RecipientCandidateFECID","RecipientCandidateName","RecipientCandidateNameNormalized","RecipientCandidateParty","RecipientCandidateICO","RecipientCandidateStatus","RecipientCandidateOfficeState","RecipientCandidateOffice","RecipientCandidateDistrict","RecipientCandidateGender","DonorName","DonorNameNormalized","DonorCity","DonorState","DonorZipCode","DonorEmployer","DonorEmployerNormalized","DonorOccupation","DonorOccupationNormalized","DonorOrganization","DonorEntityTypeCode","DonorEntityType","DonorCommitteeMLID","DonorCommitteeFECID","DonorCommitteeName","DonorCommitteeNameNormalized","DonorCommitteeTreasurer","DonorCommitteeDesignationCode","DonorCommitteeDesignation","DonorCommitteeTypeCode","DonorCommitteeType","DonorCommitteeParty","DonorCandidateMLID","DonorCandidateFECID","DonorCandidateName","DonorCandidateNameNormalized","DonorCandidateParty","DonorCandidateICO","DonorCandidateStatus","DonorCandidateOfficeState","DonorCandidateOffice","DonorCandidateDistrict","DonorCandidateGender","UpdateTimestamp" ] fec_dict_reader = csv.DictReader(open(filename), delimiter=',', quotechar='"', restkey=100, fieldnames=fieldnames) from collections import defaultdict matrix = defaultdict(dict) #print fec_dict_reader.fieldnames f = open(filename + ".xml", 'w') for line in fec_dict_reader: for k in fieldnames : d = line[k] if (isinstance( d, int )): d= str(d) if d is not None and d != '': if string.find(k,"FECID") > 0:
def loadlegs(): return leg.load()