Exemplo n.º 1
0
def analyze():
    
    c = Cube()

    print 'analyzing pickled cfda awards data...'
    
    print 'initalizing...'
    
    cfda_agency_map = {}
    
    for program in Program.objects.all():
    
        cfda_agency_map[program.program_number] = program.agency_id
    
    print 'loading data...'
    
    f = open(DATA_DIR + 'cfda_awards.out.bin', 'rb')
    awards = pickle.load(f)
    f.close()
    
    print 'building cube...'
    
    for award_id in awards:
         
        award = awards[award_id]
        
        cfda = award['cfda']
        
        if cfda_agency_map.has_key(cfda):
         
            agency = cfda_agency_map[cfda]
            
            fed_amount = award['fed_amount']
            fiscal_year  = award['fiscal_year']
            reporting_lag = award['reporting_lag']
            fiscal_year_lag = award['fiscal_year_lag']
            
            # reporting lag of negative days converted to 0
            reporting_lag = reporting_lag if reporting_lag > 0 else 0
            
            # select only fy 2007-2009 inclusive 
            if fiscal_year > 2009 or fiscal_year < 2007:
                continue
            
            # filter transactions that occur later than 180 days after the end of the fiscal year
            # this allows us to make a comparison between 2007/2008 and 2009. 
            if fiscal_year_lag > 180:
                continue 
            
            # add record to data cube
            c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount})
    
    awards = None 
    
    print 'querying cfda aggregates...'
    
    result = c.query(groups=['cfda','fy'])
    
    print 'loading cfda results into db...'
    
    ProgramTimeliness.objects.all().delete()

    for cfda in result.values:
        
        program = Program.objects.get(program_number=cfda)
        
        for fy in range(2007, 2010):
            
            metric = ProgramTimeliness.objects.create(program=program, 
                                                   agency=program.agency, 
                                                   fiscal_year=fy,
                                                   late_dollars=result.values[cfda].values[fy].get_data(aggregator=sum_dollars_45days_late),
                                                   late_rows=result.values[cfda].values[fy].get_data(aggregator=count_records_45days_late),
                                                   total_dollars=result.values[cfda].values[fy].get_data(aggregator=sum_dollars),
                                                   total_rows=result.values[cfda].values[fy].get_data(aggregator=len),
                                                   avg_lag_rows=result.values[cfda].values[fy].get_data(aggregator=avg_days_by_awards),
                                                   avg_lag_dollars=result.values[cfda].values[fy].get_data(aggregator=avg_days_by_dollars))
    
            metric.save()
            
            
    print 'querying agency aggregates...' 
     
    result = c.query(groups=['agency','fy'])
    
    print 'loading agency results into db...'
    
    AgencyTimeliness.objects.all().delete()

    for agency_id in result.values:
        
        agency = Agency.objects.get(pk=agency_id)
        
        for fy in range(2007, 2010):
            
            metric = AgencyTimeliness.objects.create(agency=agency, 
                                                   fiscal_year=fy,
                                                   late_dollars=result.values[agency_id].values[fy].get_data(aggregator=sum_dollars_45days_late),
                                                   late_rows=result.values[agency_id].values[fy].get_data(aggregator=count_records_45days_late),
                                                   total_dollars=result.values[agency_id].values[fy].get_data(aggregator=sum_dollars),
                                                   total_rows=result.values[agency_id].values[fy].get_data(aggregator=len),
                                                   avg_lag_rows=result.values[agency_id].values[fy].get_data(aggregator=avg_days_by_awards),
                                                   avg_lag_dollars=result.values[agency_id].values[fy].get_data(aggregator=avg_days_by_dollars))
    
            metric.save()
Exemplo n.º 2
0
def analyzer_main():
    print 'Loading data...'
    cfda_agency_map = dict(
        (p.program_number, p.agency_id) for p in Program.objects.all())

    awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin'))

    print 'Building cube'
    c = Cube()
    for (idx, (award_id, award)) in enumerate(awards.iteritems()):
        # Simple progress ticker
        if idx % 1000 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()

        cfda = award['cfda']
        agency = cfda_agency_map.get(cfda, None)
        if agency:
            fed_amount = award['fed_amount']
            fiscal_year = award['fiscal_year']
            reporting_lag = award['reporting_lag']
            fiscal_year_lag = award['fiscal_year_lag']

            # select only fy 2007-2009 inclusive
            if fiscal_year not in FISCAL_YEARS:
                continue

            # We need to set an upper bound on the fiscal year lag in order to
            # make comparisons between fiscal years useful.
            if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD:
                continue

            # reporting lag of negative days converted to 0
            reporting_lag = reporting_lag if reporting_lag > 0 else 0

            # add record to data cube
            c.add({
                'fy': fiscal_year,
                'cfda': cfda,
                'agency': agency
            }, {
                'days': reporting_lag,
                'dollars': fed_amount
            })

    awards = None

    print 'Querying cfda aggregates...'
    result = c.query(groups=['cfda', 'fy'])

    print 'Loading cfda results into db...'
    ProgramTimeliness.objects.all().delete()

    for (cfda, cfda_results) in result.values.iteritems():
        program = Program.objects.get(program_number=cfda)

        for fy in FISCAL_YEARS:
            cfda_fy_results = cfda_results.values[fy]
            metric = ProgramTimeliness.objects.create(
                program=program,
                agency=program.agency,
                fiscal_year=fy,
                late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late),
                late_rows=cfda_fy_results.get_data(count_records_45days_late),
                total_dollars=cfda_fy_results.get_data(sum_dollars),
                total_rows=cfda_fy_results.get_data(len),
                avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars))
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()

    print 'Querying agency aggregates...'
    result = c.query(groups=['agency', 'fy'])

    print 'Loading agency results into db...'
    AgencyTimeliness.objects.all().delete()

    for (agency_id, agency_results) in result.values.iteritems():
        agency = Agency.objects.get(pk=agency_id)

        for fy in FISCAL_YEARS:
            agency_fy_results = agency_results.values[fy]
            metric = AgencyTimeliness.objects.create(
                agency=agency,
                fiscal_year=fy,
                late_dollars=agency_fy_results.get_data(
                    sum_dollars_45days_late),
                late_rows=agency_fy_results.get_data(
                    count_records_45days_late),
                total_dollars=agency_fy_results.get_data(sum_dollars),
                total_rows=agency_fy_results.get_data(len),
                avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=agency_fy_results.get_data(
                    avg_days_by_dollars))
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()
Exemplo n.º 3
0
def analyzer_main():
    print 'Loading data...'
    cfda_agency_map = dict((p.program_number, p.agency_id) 
                           for p in Program.objects.all())
   
    awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin'))
    
    print 'Building cube'
    c = Cube()
    for (idx, (award_id, award)) in enumerate(awards.iteritems()):
        # Simple progress ticker
        if idx % 1000 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()

        cfda = award['cfda']
        agency = cfda_agency_map.get(cfda, None)
        if agency:
            fed_amount = award['fed_amount']
            fiscal_year  = award['fiscal_year']
            reporting_lag = award['reporting_lag']
            fiscal_year_lag = award['fiscal_year_lag']
            
            # select only fy 2007-2009 inclusive 
            if fiscal_year not in FISCAL_YEARS:
                continue

            # We need to set an upper bound on the fiscal year lag in order to
            # make comparisons between fiscal years useful.
            if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD:
                continue 
            
            # reporting lag of negative days converted to 0
            reporting_lag = reporting_lag if reporting_lag > 0 else 0
            
            # add record to data cube
            c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount})


    awards = None 

    print 'Querying cfda aggregates...'
    result = c.query(groups=['cfda','fy'])
    
    print 'Loading cfda results into db...'
    ProgramTimeliness.objects.all().delete()

    for (cfda, cfda_results) in result.values.iteritems():
        program = Program.objects.get(program_number=cfda)
        
        for fy in FISCAL_YEARS:
            cfda_fy_results = cfda_results.values[fy]
            metric = ProgramTimeliness.objects.create(
                program=program, 
                agency=program.agency, 
                fiscal_year=fy,
                late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late),
                late_rows=cfda_fy_results.get_data(count_records_45days_late),
                total_dollars=cfda_fy_results.get_data(sum_dollars),
                total_rows=cfda_fy_results.get_data(len),
                avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars)
            )
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()
            
            
    print 'Querying agency aggregates...' 
    result = c.query(groups=['agency','fy'])
    
    print 'Loading agency results into db...'
    AgencyTimeliness.objects.all().delete()

    for (agency_id, agency_results) in result.values.iteritems():
        agency = Agency.objects.get(pk=agency_id)
        
        for fy in FISCAL_YEARS:
            agency_fy_results = agency_results.values[fy]
            metric = AgencyTimeliness.objects.create(
                agency=agency, 
                fiscal_year=fy,
                late_dollars=agency_fy_results.get_data(sum_dollars_45days_late),
                late_rows=agency_fy_results.get_data(count_records_45days_late),
                total_dollars=agency_fy_results.get_data(sum_dollars),
                total_rows=agency_fy_results.get_data(len),
                avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards),
                avg_lag_dollars=agency_fy_results.get_data(avg_days_by_dollars)
            )
            if metric.total_dollars > 0:
                metric.late_pct = metric.late_dollars * 100 / metric.total_dollars
            metric.save()