def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file): "main entry point - loops over scenarios" msg='{} Starting total cost calculations using input file {}' logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file)) #This isn't the most efficient way to do it, but it's the most transparent: we'll loop through each base:scenario pair. For each, we'll read # the input file a line at a time and draw our consumer surplus benefits base_scenario = scenarios[0] for s in scenarios[1:]: arr_dict={} #grab a reader for the input file reader=csv.DictReader(open(map_file)) #process one line at a time from the setup file for line_ix, line in enumerate(reader, start=1): # the info comes in as a dict - this cleans up the content, removing comments, etc. Return a dict. dmap = grabinfo(line) #these set processing parameters transpose=dmap['transpose'] #use transposed trip matrix? hov_adj=dmap['hov_adj'] #occupancy adjustment (hov2 implies double time costs, say) pct_hb=dmap['pct_hb'] #fraction of benefits occuring to origin node ('home base') #these set storage parameters arr_name=dmap['rollup_to'] ## updated for total costs column_name= arr_name table_name=s['name']+"_"+dmap['rollup_dbtable'] ## updated for total costs #unless this line has both a data table and column specified, go to the next line if not arr_name or not column_name: continue #get information for the base case base_dir=base_scenario['location'] #root directory location base_name=base_scenario['name'] #name for this scenari #Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name # then create np arrays out of them base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file']) base_trips_file=get_full_filename(location=base_dir, filename=dmap['trip_file']) #try to create npa arrays from the raw data files; if they don't exist go on to the next line try: base_trips_raw = npa_from_file( base_trips_file) base_cost_per_trip_raw = npa_from_file( base_cost_file) except: exc_type, exc_value, exc_traceback = sys.exc_info() msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) continue #Costs and trips for the base case - returns base costs, base trips as square np # arrays w/o OD headers, trips transposed if needed base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw, transpose, hov_adj ) #Process the scenario costs and trips the same way test_dir = s['location'] #grab the files and put them in np arrays test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file']) test_trip_file=get_full_filename(location=test_dir, filename=dmap['trip_file']) try: test_trips_raw = npa_from_file( test_trip_file) test_cost_per_trip_raw = npa_from_file( test_cost_file) except: exc_type, exc_value, exc_traceback = sys.exc_info() msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}' logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file)) continue test_name=s['name'] test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw, transpose=transpose, hov_adj=hov_adj ) #Scenario case trips*cost/trip and trips used if np.equal(test_costs, base_costs).all() and np.isclose(test_trips, base_trips).all(): if not "FARE" in test_cost_file and not "FARE" in base_cost_file: msg="\nWARN: Same file: {} ::: {}" logger.critical(msg.format(base_trips_file, test_trip_file)) logger.critical(msg.format( base_cost_file, test_cost_file)) logger.critical('\n') #With all costs gathered, calculate the change in costsin square np array; produces a square np array ##cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs) ## updated for total costs total_cost_delta=get_total_cost_delta(base_trips, test_trips, base_costs, test_costs) logger.debug('total_cost_delta {}'.format(total_cost_delta.sum())) #From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers # For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips. # For mid-day and night-time highway trips, the benefit is split between origin and dest nodes. ## updated for total costs #benefits_by_zone = calculate_benefits(cs_delta, pct_hb) costs_by_zone = calculate_benefits(total_cost_delta, pct_hb) #the balance of this block stores the benefits and other information for posterity/more analysis #We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles). We can use scalars to transform #minutes to dollars, miles to CO2, etc. as a post-processing step. #We'll create an aggregation array if requested in the 'aggregate to' column. This bit of code adds the array to the arr_dict if needed, # then creates a null np array if needed. It adds a column of current benefits_by_zone to the array if the array is null; it increments # the array by the benefits just calculated otherwise. #to hold the results, we'll make a dict arr_dict{'aggregate_to': {'data':npa_of_data, # 'column': 'aggregate_to', # 'table': 'db_table}} #... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column. #create the dict if needed (it doesnt yet exist) if not arr_name in arr_dict: arr_dict[arr_name]={} arr_dict[arr_name]['data']=None #update the dict with current state of the roll-up array arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], vector= costs_by_zone, max_index_val=len(base_trips)), 'column': column_name, 'table': table_name } logger.debug('{} -versus- {} line {}\n\t {} \n\t {} \n\t base trips: {} test trips: {} sum dlta costs: {} (summary stats - not used in benefit calcs)'.format( base_name, test_name, line_ix, dmap['trip_file'], dmap['cost_file'].split()[0], np.sum(base_trips), np.sum(test_trips), np.sum(total_cost_delta))) #store the arrays in db tables store_data(arr_dict=arr_dict, db=DB, zones=ZONES) finish = datetime.now() msg='Finished at {}. Processed {} files in {}.' elapsed=str(finish-start).split('.')[0] print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed)) logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
def test_database_updates(self): """makes sure that the database storage routine does all of the following: - creates a new database if none exists - adds new tables as needed - adds new columns as needed and populates them with the right data. """ db='test_db' #test1: will it store/retrieve a single array? create_new_tables=True test_data_1=np.array(( [[1,11], [2, 12], [3,13]] )) column='col1' table='table1' #create a fake dict with a bit of data arr_dict={} arr_dict['test1']={ 'data': test_data_1, 'column': column, 'table':table } analyze_main.store_data(arr_dict=arr_dict, db = db, create_new_tables=create_new_tables, zones=3) #make a db connection (let main app create the db if needed) conn = psycopg2.connect(database = db, user=login_info['user'], password=login_info['password'] ) curs = conn.cursor() curs.execute('SELECT * from {}'.format(table)) conn.commit() #do we get our single array? self.assertTrue( np.isclose(np.array(curs.fetchall()), test_data_1).all()) #***Now try it with two columns in one table and two in another: test_data_1=np.array(( [[1,11], [2, 12], [3,13]] )) test_data_2=np.array(( [[1,110], [2, 120], [3,130]] )) column1='col1' column2='col2' table1='table1' table2='table2' #create a fake dict arr_dict={} arr_dict['test1']={ 'data': test_data_1, 'column': column1, 'table':table1 } arr_dict['test2']={ 'data': test_data_1, 'column': column2, 'table':table1 } arr_dict['test3']={ 'data': test_data_2, 'column': column1, 'table':table2 } arr_dict['test4']={ 'data': test_data_2, 'column': column2, 'table':table2 } analyze_main.store_data(arr_dict=arr_dict, db = db, create_new_tables=create_new_tables, zones=3) exp_table1=np.array([(1.0, 11.0, 11.0), (2.0, 12.0, 12.0), (3.0, 13.0, 13.0)]) exp_table2=np.array([(1.0, 110.0, 110.0), (2.0, 120.0, 120.0), (3.0, 130.0, 130.0)]) curs.execute('SELECT * from {}'.format(table1)) actual_table1=curs.fetchall() curs.execute('SELECT * from {}'.format(table2)) actual_table2=curs.fetchall() self.assertTrue( np.isclose(exp_table1, actual_table1).all()) self.assertTrue ( np.isclose(exp_table2, actual_table2).all())