Пример #1
0
def main():
    assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR
    
    files = glob.glob(ALMANAC_DIR + '*/people/*/rep_*.htm') + \
            glob.glob(ALMANAC_DIR + '*/people/*/*s[12].htm')
    files.sort()
    for fn in files:
        district = web.storage()
        demog = None
        
        dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_')
        diststate = dist[0:2].upper()
        distnum = dist[-2:]
        distname = tools.fixdist(diststate + '-' + distnum)
        
        d = almanac.scrape_person(fn)
        load_election_results(d, distname)

        if ALMANAC_DIR + '2008' in fn:
            if 'demographics' in d:
                demog = d['demographics']
            elif distname[-2:] == '00' or '-' not in distname:   # if -00 then this district is the same as the state.
                #print "Using state file for:", distname
                statefile = ALMANAC_DIR + '2008/states/%s/index.html' % diststate.lower()
                demog = almanac.scrape_state(statefile).get('state')

            demog_to_dist(demog, district)

            district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):]

            #print 'district:', distname, pformat(district)
            db.update('district', where='name=$distname', vars=locals(), **district)
Пример #2
0
def main():
    assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR
    
    files = glob.glob(ALMANAC_DIR + 'people/*/rep_*.htm') + \
            glob.glob(ALMANAC_DIR + 'people/*/*s[12].htm')
    files.sort()
    for fn in files:
        district = web.storage()
        demog = None
        
        dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_')
        diststate = dist[0:2].upper()
        distnum = dist[-2:]
        distname = tools.fixdist(diststate + '-' + distnum)
        
        d = almanac.scrape_person(fn)
        load_election_results(d, distname)

        if 'demographics' in d:
            demog = d['demographics']
        elif distname[-2:] == '00' or '-' not in distname:   # if -00 then this district is the same as the state.
            #print "Using state file for:", distname
            statefile = ALMANAC_DIR + 'states/%s/index.html' % diststate.lower()
            demog = almanac.scrape_state(statefile).get('state')

        demog_to_dist(demog, district)

        district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):]

        #print 'district:', distname, pformat(district)
        db.update('district', where='name=$distname', vars=locals(), **district)
Пример #3
0
def main():
    districts = simplejson.load(file(DATA_DIR + '/load/districts/index.json'))
    
    assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR
    out = {}
    for fn in glob.glob(ALMANAC_DIR + 'people/*/rep*'):
        district = web.storage()
        
        dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_')
        diststate = dist[0:2].upper()
        distnum = dist[-2:]
        
        d = almanac.scrape_person(fn)
        if 'demographics' in d:
            demog = d['demographics']
        else:
            #@@ maybe only when diststate + '-00' in districts?
            statefile = ALMANAC_DIR + 'states/%s/index.html' % diststate.lower()
            demog = almanac.scrape_state(statefile).get('state')

        if demog:
            district.cook_index = get_int(demog, 'Cook Partisan Voting Index')
            district.area_sqmi = cleanint(web.rstrips(demog['Area size'], ' sq. mi.'))
            district.poverty_pct = get_int(demog, 'Poverty status')
            district.median_income = get_int(demog, 'Median income')
            (district.est_population_year,
             district.est_population) = coalesce_population(demog, [
                (2006, 'Pop. 2006 (est)'),
                (2005, 'Pop. 2005 (est)'),
                (2000, 'Pop. 2000'),
            ])

        if 'interest_group_rating' in d:
            district.interest_group_rating = d['interest_group_rating']

        district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):]

        # Nationaljournal numbers districts of congressmen-at-large
        # and territorial delegates '01' in its URLs, but our
        # districts file numbers them '00'.
        if distnum == '01' and diststate + '-00' in districts:
            distnum = '00'
        out[diststate + '-' + distnum]  = district
    return out