Exemplo n.º 1
0
def load_data():
    c = csv.reader(file('../data/crawl/maplight/uniq_map_export_bill_research.csv'))
    supportdict = {'0': -1, '1': 1, '2': 0 } #0: oppose ; 1: support; 2: not known (from README)
    
    with db.transaction():
        db.delete('interest_group_bill_support', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                category_id, longname, maplightid, session, measure, support = line
                support = supportdict[support]
                if support == 0: continue
                typenumber = measure.lower().replace(' ', '')
                    
                r = db.select('interest_group', what="id", where="longname=$longname", vars=locals())
                if r:
                    groupid = r[0].id
                else:
                    groupid = db.insert('interest_group', longname=longname, category_id=category_id)
                    
                bill_id = 'us/%s/%s' % (session, typenumber)
                r = db.select('bill', where="id=$bill_id", vars=locals())
                if not r:
                    filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % (session, typenumber)
                    bills.loadbill(filename, maplightid=maplightid)
                else:
                    db.update('bill', maplightid=maplightid, where="id=$bill_id", vars=locals())
                    
                try:
                    #print '\r', bill_id,
                    db.insert('interest_group_bill_support', seqname=False, bill_id=bill_id, group_id=groupid, support=support)
                except:
                    print '\n Duplicate row with billid %s groupid %s support %s longname %s' % (bill_id, groupid, support, longname)
                    raise
Exemplo n.º 2
0
def load_data():
    c = csv.reader(
        file('../data/crawl/maplight/uniq_map_export_bill_research.csv'))
    supportdict = {
        '0': -1,
        '1': 1,
        '2': 0
    }  #0: oppose ; 1: support; 2: not known (from README)

    with db.transaction():
        db.delete('interest_group_bill_support', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                category_id, longname, maplightid, session, measure, support = line
                support = supportdict[support]
                if support == 0: continue
                typenumber = measure.lower().replace(' ', '')

                r = db.select('interest_group',
                              what="id",
                              where="longname=$longname",
                              vars=locals())
                if r:
                    groupid = r[0].id
                else:
                    groupid = db.insert('interest_group',
                                        longname=longname,
                                        category_id=category_id)

                bill_id = 'us/%s/%s' % (session, typenumber)
                r = db.select('bill', where="id=$bill_id", vars=locals())
                if not r:
                    filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % (
                        session, typenumber)
                    bills.loadbill(filename, maplightid=maplightid)
                else:
                    db.update('bill',
                              maplightid=maplightid,
                              where="id=$bill_id",
                              vars=locals())

                try:
                    #print '\r', bill_id,
                    db.insert('interest_group_bill_support',
                              seqname=False,
                              bill_id=bill_id,
                              group_id=groupid,
                              support=support)
                except:
                    print '\n Duplicate row with billid %s groupid %s support %s longname %s' % (
                        bill_id, groupid, support, longname)
                    raise
Exemplo n.º 3
0
def get_dist(zip5):
    # TODO: if zip5 is state (select code from state where code=$zip5) then return {zip5: 1.0}
    if db.select('state', where='code=$zip5',vars=locals()):
        return {zip5:1.0}
    dists  = db.select('zip4', 
            what='COUNT(plus4), district_id', 
            where='zip=$zip5', 
            group='district_id',
            vars=locals()).list()
    all_zip4 = sum(map(lambda d: d.count, dists))
    ret = {}
    for d in dists:
        ret[d.district_id] = float(d.count) / float(all_zip4)
    return ret
Exemplo n.º 4
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(
                        db.select('politician_fec_ids',
                                  where='fec_id=$fec_id',
                                  vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(
                        db.select('politician',
                                  where='lastname=$ln and firstname=$fn',
                                  vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Exemplo n.º 5
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids',
                                                where='fec_id=$fec_id',
                                                vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician',
                                        where='lastname=$ln and firstname=$fn',
                                        vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Exemplo n.º 6
0
def main():
    with db.transaction():
        db.delete("vote", "1=1")
        bill_ids = ", ".join((str(s.id) for s in db.select("bill", what="id")))
        db.delete("interest_group_bill_support", where="bill_id in ($bill_ids)", vars=locals())
        db.delete("bill", "1=1")
        for fn in glob.glob("../data/crawl/govtrack/us/*/bills/*.xml"):
            loadbill(fn)
Exemplo n.º 7
0
def check_user():
    users = db.select('user', limitation={'level': 0})
    for user in users:
        print(user)
        n_level = input('new level: ')
        n_level = int(n_level)
        assert n_level >= 0 and n_level <= 100
        db.upd_row('user',
                   limitation={'wechat': user['wechat']},
                   data={'level': n_level})
Exemplo n.º 8
0
def load_fec_efilings():
    for f in fec_csv.parse_efilings():
        for s in f['schedules']:
            if s['type'] == 'contribution':
                politician_id = None
                if f['candidate_fec_id']:
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f['candidate']:
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s['contributor_org'],
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f['candidate'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                db.insert('expenditure',
                          candidate_name=f['candidate'],
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
Exemplo n.º 9
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    if maplightid:
        d["maplightid"] = maplightid
    db.insert("bill", seqname=False, **d)
    print "\r  %-25s" % d["id"],
    sys.stdout.flush()

    done = []
    for vote in bill.actions["vote":]:
        if not vote().get("roll"):
            continue
        if vote("where") in done:
            continue  # don't count veto overrides
        done.append(vote("where"))

        votedoc = "%s/rolls/%s%s-%s.xml" % (d["session"], vote("where"), vote("datetime")[:4], vote("roll"))
        vote = xmltramp.load("../data/crawl/govtrack/us/" + votedoc)
        yeas = 0
        neas = 0
        for voter in vote["voter":]:
            if fixvote(voter("vote")) == 1:
                yeas += 1
            elif fixvote(voter("vote")) == -1:
                neas += 1
            rep = govtrackp(voter("id"))
            if rep:
                # UGLY HACK: if a politician (bob_menendez for instance) voted
                # for the same bill in both chambers of congress the insert
                # fails.
                if not db.select("vote", where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()):
                    db.insert("vote", seqname=False, politician_id=rep, bill_id=d["id"], vote=fixvote(voter("vote")))
                else:
                    print
                    print "Updating:", votedoc, rep, d["id"], fixvote(voter("vote"))
                    db.update(
                        "vote",
                        where="bill_id=$d['id'] AND politician_id=$rep",
                        vote=fixvote(voter("vote")),
                        vars=locals(),
                    )
        db.update("bill", where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())