def load():
    legs= leg.load()
    for x in sorted(legs['wp'].keys()):
        idsobj= legs['wp'][x]['id'] 
        name = legs['wp'][x]['name']['official_full'] 
        congid = idsobj['govtrack']
        wiki= idsobj['opencongwiki']
        if (not wiki == "Error" ):
            continue

        xml = cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid)
        xml =xml.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>","")
        xmlio = StringIO(xml)

        tree = etree.parse(xmlio)
        name =  unicode(tree.xpath("//unaccented-name/text()")[0])
        name = name.replace(" ","_")
        url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name
        #url = "http://www.opencongress.org/wiki/%s" % name
        try :
            data = cache.cacheweb( url)
        except Exception, e:
            print "failed", name ,e
        except KeyboardInterrupt:
            print "bye"
            exit()
示例#2
0
def load():
    legs = leg.load()
    out = open("report.wiki", 'w')

    for x in sorted(legs['wp'].keys()):
        idsobj = legs['wp'][x]['id']
        gt = idsobj['govtrack']
        oname = idsobj['opencongwiki']
        name = unicode(oname)
        name = name.encode('utf-8')
        name = urllib.quote_plus(name)

        url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name

        try:
            data = cache.cacheweb(url)
        except urllib2.HTTPError, e:
            if (e.code == 404):
                p = 'http://www.opencongress.org/people/show/%d' % gt
                #            print u"* Missing [[" , unicode(oname) , u"]] from [", unicode(p),  u" ", unicode(oname) ,  u"]"
                s = u"* Missing [[{}]] from [{} {}]\n".format(
                    unicode(oname), p, unicode(oname))
                print s
                out.write(s.encode('utf-8'))
                idsobj['opencongwiki'] = "Error"

        except Exception, e:
            #            print "Missing [[",name,"]]"
            print "failed", name, e
            idsobj['opencongwiki'] = "Error2"
def process(filename, verbose):
    u"""
    Process the file
    """
    if verbose:
        print("reading", filename)
    #
    fileobj = open(filename, "rb")
    wiki = open(filename + '.wiki', "wb")
    table = open(filename + '.wikitable', "wb")
    legs = leg.load()
    # index
    fecids = {}
    for fec_id in sorted(legs['wp'].keys()):
        if 'fec' in legs['wp'][fec_id]['id']:
            for field in legs['wp'][fec_id]['id']['fec']:
                fecids[field] = legs['wp'][fec_id]


    for line in fileobj.read().split("\n"):
        if line[0:4] == "SB23":
        #            print "---"
            fields = line.split("")
            wikify_fields(fecids, fields, table, wiki)
    wiki.close()
    table.close()
def load():
    legs= leg.load()
    out = open("report.wiki", 'w')

    for x in sorted(legs['wp'].keys()):
        idsobj= legs['wp'][x]['id'] 
        gt = idsobj['govtrack']
        oname = idsobj['opencongwiki']
        name = unicode(oname)
        name = name.encode('utf-8')
        name= urllib.quote_plus(name)

        url = "http://www.opencongress.org/w/index.php?title=%s&printable=yes" % name

        try :
            data = cache.cacheweb( url)
        except urllib2.HTTPError, e: 
            if ( e.code == 404) :
                p='http://www.opencongress.org/people/show/%d' % gt
                #            print u"* Missing [[" , unicode(oname) , u"]] from [", unicode(p),  u" ", unicode(oname) ,  u"]"
                s=u"* Missing [[{}]] from [{} {}]\n".format(unicode(oname),p,unicode(oname))
                print s
                out.write(s.encode('utf-8'))
                idsobj['opencongwiki']="Error"
                
        except Exception, e:
#            print "Missing [[",name,"]]"
            print "failed", name ,e
            idsobj['opencongwiki']="Error2"
def load():
    legs= leg.load()
    for x in sorted(legs['wp'].keys()):
        idsobj= legs['wp'][x]['id'] 
    
        if 'govtrack' in idsobj:
            congid = idsobj['govtrack']
            cache.cacheweb('http://api.opencongress.org/people?person_id=%d' % congid)
            htmlstr = cache.cacheweb('http://www.opencongress.org/people/show/%d' % congid)
            parse(htmlstr,idsobj)

    dump.dump(legs)
示例#6
0
def load (filename):
    #filename= "maplight-convert/FEC2012c2.csv"
    fecs = {}
    legs= leg.load()
    for x in sorted(legs['wp'].keys()):
        obj= legs['wp'][x]
        idsobj= obj['id'] 
        name = obj['name']['official_full'] 
        if 'fec' in idsobj:
            for fec in idsobj['fec']:
                #print "fec",fec
                fecs[fec]=obj


    fieldnames=[ "TransactionTypeCode","TransactionType","ElectionCycle","ReportingCommitteeMLID","ReportingCommitteeFECID","ReportingCommitteeName","ReportingCommitteeNameNormalized","PrimaryGeneralIndicator","TransactionID","FileNumber","RecordNumberML","RecordNumberFEC","TransactionDate","TransactionAmount",
                 "RecipientName","RecipientNameNormalized","RecipientCity","RecipientState","RecipientZipCode","RecipientCommitteeMLID","RecipientCommitteeFECID","RecipientCommitteeName","RecipientCommitteeNameNormalized","RecipientCommitteeTreasurer","RecipientCommitteeDesignationCode","RecipientCommitteeDesignation","RecipientCommitteeTypeCode","RecipientCommitteeType","RecipientCommitteeParty","RecipientCandidateMLID","RecipientCandidateFECID","RecipientCandidateName","RecipientCandidateNameNormalized","RecipientCandidateParty","RecipientCandidateICO","RecipientCandidateStatus","RecipientCandidateOfficeState","RecipientCandidateOffice","RecipientCandidateDistrict","RecipientCandidateGender",
                 "DonorName","DonorNameNormalized","DonorCity","DonorState","DonorZipCode","DonorEmployer","DonorEmployerNormalized","DonorOccupation","DonorOccupationNormalized","DonorOrganization","DonorEntityTypeCode","DonorEntityType","DonorCommitteeMLID","DonorCommitteeFECID","DonorCommitteeName","DonorCommitteeNameNormalized","DonorCommitteeTreasurer","DonorCommitteeDesignationCode","DonorCommitteeDesignation","DonorCommitteeTypeCode","DonorCommitteeType","DonorCommitteeParty","DonorCandidateMLID","DonorCandidateFECID","DonorCandidateName","DonorCandidateNameNormalized","DonorCandidateParty","DonorCandidateICO","DonorCandidateStatus","DonorCandidateOfficeState","DonorCandidateOffice","DonorCandidateDistrict","DonorCandidateGender","UpdateTimestamp" ]

    fec_dict_reader = csv.DictReader(open(filename), delimiter=',', quotechar='"', restkey=100, fieldnames=fieldnames)
    from collections import defaultdict
    matrix = defaultdict(dict)
    #print fec_dict_reader.fieldnames

    f = open(filename + ".xml", 'w')
    for line in fec_dict_reader:
        for k in fieldnames :
            d = line[k]
            if (isinstance( d, int )):
                d= str(d)
            if d is not None  and d != '':           

                val = line['TransactionAmount']
                if (val == 'TransactionAmount'):
                    continue
                if (len(val)> 0):
                    try:
                        val = int(val)
                    except:
                        print "'%s'" % val, "failed"
                else:
                    val = 0
                if string.find(k,"CandidateFECID") > 0:
                    if d not in fecs:
                        fecs[d]= {}
                        fecs[d]["fec_2012_2total"] =val
                    else:
                        if "fec_2012_2total" in fecs[d]:
                            fecs[d]["fec_2012_2total"] = fecs[d]["fec_2012_2total"] + val 
                        else:
                            fecs[d]["fec_2012_2total"] = val                  

    dump.dump(fecs)
def load():
    legs = leg.load()
    for x in sorted(legs['wp'].keys()):
        idsobj = legs['wp'][x]['id']

        if 'govtrack' in idsobj:
            congid = idsobj['govtrack']
            cache.cacheweb('http://api.opencongress.org/people?person_id=%d' %
                           congid)
            htmlstr = cache.cacheweb(
                'http://www.opencongress.org/people/show/%d' % congid)
            parse(htmlstr, idsobj)

    dump.dump(legs)
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", [])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
    verbose = False

    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    legs = legislators_current.load()
    #print legs
    #    print legs['wp'].keys()
    #    print legs['wp'].items()
    names = sorted(legs['wp'].keys())
    #    print names
    for x in names:
        #        print legs['wp'][x]
        try:
            d = wiki.parse_wiki_source(x, legs)
            A = legs['wp'][x]
            #check_imdb(x,A,d)
            #check_wikipedia(x,A,d)
            #compare_cong(x,A,d)
            #compare_votesmart(x,A,d)
            compare_washpo(x, A, d)
        except Exception, e:
            print "error1:", e
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", [])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
    verbose = False

    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        else:
            assert False, "unhandled option"

    legs= legislators_current.load()
    #print legs
    #    print legs['wp'].keys()
    #    print legs['wp'].items()
    names = sorted(legs['wp'].keys())
#    print names
    for x in  names:
#        print legs['wp'][x]
        try :
            d =wiki.parse_wiki_source(x,legs)
            A = legs['wp'][x]
            #check_imdb(x,A,d)
            #check_wikipedia(x,A,d)
            #compare_cong(x,A,d)
            #compare_votesmart(x,A,d)
            compare_washpo(x,A,d)
        except Exception,e:
            print "error1:",e
示例#10
0
import Ballotpedia_house as breps
import List_of_current_United_States_Senators as sens
import Ballotpedia_senator as bsens
import cache
import legislators_current as leg

from cStringIO import StringIO

rep = cache.cache('reps', reps.parse_rep)
brep = cache.cache('breps', breps.parse)
sen = cache.cache('sen', sens.parse)
bsen = cache.cache('bsen', bsens.parse)

congress = {'wp': rep['wp'].copy()}
congress['wp'].update(sen['wp'])
legs = leg.load()


def compare(a, b, name, link):
    for x in sorted(a['wp'].keys()):
        aobj = a['wp'][x]
        #        print aobj['id']
        if link not in aobj['id']:
            pass
        else:
            bid = aobj['id'][link]
            bid = bid.replace(" ", "_")
            if bid not in b['wp']:
                #               print "missing in B ", bid
                #                print sorted(b['wp'].keys())
                continue
import csv
import string 
import legislators_current as leg
import dump
fecs = {}
legs= leg.load()
for x in sorted(legs['wp'].keys()):
    obj= legs['wp'][x]
    idsobj= obj['id'] 
    name = obj['name']['official_full'] 
    if 'fec' in idsobj:
        for fec in idsobj['fec']:
#            print "fec",fec
            fecs[fec]=obj

filename= "maplight-convert/FEC2012c1_1.csv"
fieldnames=[   "TransactionTypeCode","TransactionType","ElectionCycle","ReportingCommitteeMLID","ReportingCommitteeFECID","ReportingCommitteeName","ReportingCommitteeNameNormalized","PrimaryGeneralIndicator","TransactionID","FileNumber","RecordNumberML","RecordNumberFEC","TransactionDate","TransactionAmount","RecipientName","RecipientNameNormalized","RecipientCity","RecipientState","RecipientZipCode","RecipientEmployer","RecipientEmployerNormalized","RecipientOccupation","RecipientOccupationNormalized","RecipientOrganization","RecipientEntityTypeCode","RecipientEntityType","RecipientCommitteeMLID","RecipientCommitteeFECID","RecipientCommitteeName","RecipientCommitteeNameNormalized","RecipientCommitteeTreasurer","RecipientCommitteeDesignationCode","RecipientCommitteeDesignation","RecipientCommitteeTypeCode","RecipientCommitteeType","RecipientCommitteeParty","RecipientCandidateMLID","RecipientCandidateFECID","RecipientCandidateName","RecipientCandidateNameNormalized","RecipientCandidateParty","RecipientCandidateICO","RecipientCandidateStatus","RecipientCandidateOfficeState","RecipientCandidateOffice","RecipientCandidateDistrict","RecipientCandidateGender","DonorName","DonorNameNormalized","DonorCity","DonorState","DonorZipCode","DonorEmployer","DonorEmployerNormalized","DonorOccupation","DonorOccupationNormalized","DonorOrganization","DonorEntityTypeCode","DonorEntityType","DonorCommitteeMLID","DonorCommitteeFECID","DonorCommitteeName","DonorCommitteeNameNormalized","DonorCommitteeTreasurer","DonorCommitteeDesignationCode","DonorCommitteeDesignation","DonorCommitteeTypeCode","DonorCommitteeType","DonorCommitteeParty","DonorCandidateMLID","DonorCandidateFECID","DonorCandidateName","DonorCandidateNameNormalized","DonorCandidateParty","DonorCandidateICO","DonorCandidateStatus","DonorCandidateOfficeState","DonorCandidateOffice","DonorCandidateDistrict","DonorCandidateGender","UpdateTimestamp" ]

fec_dict_reader = csv.DictReader(open(filename), delimiter=',', quotechar='"', restkey=100, fieldnames=fieldnames)
from collections import defaultdict
matrix = defaultdict(dict)
#print fec_dict_reader.fieldnames

f = open(filename + ".xml", 'w')
for line in fec_dict_reader:
    for k in fieldnames :
        d = line[k]
        if (isinstance( d, int )):
            d= str(d)
        if d is not None  and d != '':           
            if string.find(k,"FECID") > 0:
def loadlegs():
    return leg.load()
def loadlegs():
    return leg.load()