Пример #1
0
def load():
    outdb = {}
    done = set()
    with db.transaction():
        db.delete('earmark_sponsor', '1=1')
        db.delete('earmark', '1=1')
        for e in earmarks.parse_file(earmarks.EARMARK_FILE):
            de = dict(e)
            de['id'] = web.intget(de['id'])
            if not de['id'] or de['id'] in done: continue # missing the ID? come on!
            if isinstance(de['house_request'], basestring): continue # CLASSIFIED

            for k in de: de[k] = cleanrow(de[k])
            for x in ['house_member', 'house_state', 'house_party', 'senate_member', 'senate_state', 'senate_party', 'district']:
                de.pop(x)
            
            de['recipient_stem'] = tools.stemcorpname(de['intended_recipient'])
            try:
                db.insert('earmark', seqname=False, **de)
            except:
                pprint(de)
                raise
            done.add(de['id'])
        
    reps_not_found = set()
    for e in earmarks.parse_file(earmarks.EARMARK_FILE):
        for rawRequest, chamber in zip([e.house_request, e.senate_request],[e.house_member, e.senate_member]):
            for rep in chamber:
                if rep.lower() not in lastname2rep:
                    #@@ should work on improving quality
                    reps_not_found.add(rep)
                else:
                    rep = lastname2rep[rep.lower()]
                    if e.id in done: 
                        try:
                            db.insert('earmark_sponsor', seqname=False, earmark_id=e.id, politician_id=rep)
                        except:
                            print "Couldn't add %s as sponsor to earmark %d" %(rep, e.id)
                    outdb.setdefault(rep, {
                      'amt_earmark_requested': 0,
                      'n_earmark_requested': 0,
                      'n_earmark_received': 0,
                      'amt_earmark_received': 0
                    })
                    outdb[rep]['n_earmark_requested'] += 1
                    requested = rawRequest or e.final_amt
                    if not isinstance(requested, float):
                        requested = e.final_amt
                    if requested:
                        outdb[rep]['amt_earmark_requested'] += requested
                    if isinstance(e.final_amt, float) and e.final_amt:
                        outdb[rep]['n_earmark_received'] += 1
                        outdb[rep]['amt_earmark_received'] += e.final_amt
    
    print "Did not find",len(reps_not_found),"reps:", pformat(reps_not_found)
    for rep, d in outdb.iteritems():
        db.update('politician', where='id=$rep', vars=locals(), **d)
Пример #2
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(
                        db.select('politician_fec_ids',
                                  where='fec_id=$fec_id',
                                  vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(
                        db.select('politician',
                                  where='lastname=$ln and firstname=$fn',
                                  vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Пример #3
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids',
                                                where='fec_id=$fec_id',
                                                vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician',
                                        where='lastname=$ln and firstname=$fn',
                                        vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Пример #4
0
def load_fec_contributions():
    t = db.transaction(); n = 0
    db.delete('contribution', '1=1')
    for f in fec_cobol.parse_contributions():
        f = web.storage(f)
        f.occupation = f.occupation.replace('N/A', '')
        if '/' in f.occupation:
            employer, occupation = f.occupation.split('/', 1)
        else:
            employer = ''
            occupation = f.occupation
        
        try:
            datetime.date(*[int(x) for x in f.date.split('-')])
        except ValueError:
            f.date = None
        
        db.insert('contribution',
          fec_record_id = f.get('fec_record_id'),
          microfilm_loc = f.microfilm_loc,
          recipient_id = f.filer_id,
          name = f.name,
          street = f.get('street'),
          city = f.city,
          state = f.state,
          zip = f.zip,
          occupation = occupation,
          employer = employer,
          employer_stem = tools.stemcorpname(employer),
          committee = f.from_id or None,
          sent = f.date,
          amount = f.amount
        )
        n += 1
        if n % 10000 == 0: t.commit(); t = db.transaction(); print n
    t.commit()
    print "Creating indexes on table `contribution`..."
    schema.Contribution.create_indexes()
    print "done."
Пример #5
0
def load_fec_contributions():
    t = db.transaction()
    n = 0
    db.delete('contribution', '1=1')
    for f in fec_cobol.parse_contributions():
        f = web.storage(f)
        f.occupation = f.occupation.replace('N/A', '')
        if '/' in f.occupation:
            employer, occupation = f.occupation.split('/', 1)
        else:
            employer = ''
            occupation = f.occupation

        try:
            datetime.date(*[int(x) for x in f.date.split('-')])
        except ValueError:
            f.date = None

        db.insert('contribution',
                  fec_record_id=f.get('fec_record_id'),
                  microfilm_loc=f.microfilm_loc,
                  recipient_id=f.filer_id,
                  name=f.name,
                  street=f.get('street'),
                  city=f.city,
                  state=f.state,
                  zip=f.zip,
                  occupation=occupation,
                  employer=employer,
                  employer_stem=tools.stemcorpname(employer),
                  committee=f.from_id or None,
                  sent=f.date,
                  amount=f.amount)
        n += 1
        if n % 10000 == 0:
            t.commit()
            t = db.transaction()
            print n
    t.commit()
Пример #6
0
def load_fec_efilings():
    for f in fec_csv.parse_efilings():
        for s in f['schedules']:
            if s['type'] == 'contribution':
                politician_id = None
                if f['candidate_fec_id']:
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f['candidate']:
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s['contributor_org'],
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f['candidate'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                db.insert('expenditure',
                          candidate_name=f['candidate'],
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
Пример #7
0
def load():
    outdb = {}
    done = set()
    with db.transaction():
        db.delete('earmark_sponsor', '1=1')
        db.delete('earmark', '1=1')
        for e in earmarks.parse_file(earmarks.EARMARK_FILE):
            de = dict(e)
            de['id'] = web.intget(de['id'])
            if not de['id'] or de['id'] in done:
                continue  # missing the ID? come on!
            if isinstance(de['house_request'], basestring):
                continue  # CLASSIFIED

            for k in de:
                de[k] = cleanrow(de[k])
            for x in [
                    'house_member', 'house_state', 'house_party',
                    'senate_member', 'senate_state', 'senate_party', 'district'
            ]:
                de.pop(x)

            de['recipient_stem'] = tools.stemcorpname(de['intended_recipient'])
            try:
                db.insert('earmark', seqname=False, **de)
            except:
                pprint(de)
                raise
            done.add(de['id'])

    reps_not_found = set()
    for e in earmarks.parse_file(earmarks.EARMARK_FILE):
        for rawRequest, chamber in zip([e.house_request, e.senate_request],
                                       [e.house_member, e.senate_member]):
            for rep in chamber:
                if rep.lower() not in lastname2rep:
                    #@@ should work on improving quality
                    reps_not_found.add(rep)
                else:
                    rep = lastname2rep[rep.lower()]
                    if e.id in done:
                        try:
                            db.insert('earmark_sponsor',
                                      seqname=False,
                                      earmark_id=e.id,
                                      politician_id=rep)
                        except:
                            print "Couldn't add %s as sponsor to earmark %d" % (
                                rep, e.id)
                    outdb.setdefault(
                        rep, {
                            'amt_earmark_requested': 0,
                            'n_earmark_requested': 0,
                            'n_earmark_received': 0,
                            'amt_earmark_received': 0
                        })
                    outdb[rep]['n_earmark_requested'] += 1
                    requested = rawRequest or e.final_amt
                    if not isinstance(requested, float):
                        requested = e.final_amt
                    if requested:
                        outdb[rep]['amt_earmark_requested'] += requested
                    if isinstance(e.final_amt, float) and e.final_amt:
                        outdb[rep]['n_earmark_received'] += 1
                        outdb[rep]['amt_earmark_received'] += e.final_amt

    print "Did not find", len(reps_not_found), "reps:", pformat(reps_not_found)
    for rep, d in outdb.iteritems():
        db.update('politician', where='id=$rep', vars=locals(), **d)