def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file):
	"main entry point - loops over scenarios"

	msg='{} Starting total cost calculations using input file {}'
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))

	#This isn't the most efficient way to do it, but it's the most transparent:  we'll loop through each base:scenario pair.  For each, we'll read
	#  the input file a line at a time and draw our consumer surplus benefits

	
	base_scenario = scenarios[0]

	for s in scenarios[1:]:
		
		arr_dict={}
		#grab a reader for the input file
		reader=csv.DictReader(open(map_file))

		#process one line at a time from the setup file
		for line_ix, line in enumerate(reader, start=1):

			# the info comes in as a dict - this cleans up the content, removing comments, etc.  Return a dict.
			dmap = grabinfo(line)

			#these set processing parameters
			transpose=dmap['transpose']				#use transposed trip matrix?
			hov_adj=dmap['hov_adj']						#occupancy adjustment (hov2 implies double time costs, say)
			pct_hb=dmap['pct_hb'] 							#fraction of benefits occuring to origin node ('home base')		

			#these set storage parameters
			arr_name=dmap['rollup_to']                                                        ## updated for total costs
			column_name= arr_name
			table_name=s['name']+"_"+dmap['rollup_dbtable']		   ## updated for total costs
			
			#unless this line has both a data table and column specified, go to the next line
			if not arr_name or not column_name:
				continue

			#get information for the base case
			base_dir=base_scenario['location']		#root directory location
			base_name=base_scenario['name']		#name for this scenari

			#Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name
			#   then create np arrays out of them
			base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file'])
			base_trips_file=get_full_filename(location=base_dir,  filename=dmap['trip_file'])

			#try to create npa arrays from the raw data files; if they don't exist go on to the next line
			try:
				base_trips_raw = npa_from_file( base_trips_file)
				base_cost_per_trip_raw = npa_from_file( base_cost_file)	
			except:
				exc_type, exc_value, exc_traceback = sys.exc_info()
				msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))
				continue

			#Costs and trips for the base case - returns  base costs, base trips as  square np 
			#   arrays w/o OD headers, trips transposed if needed
			base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw,  transpose,  hov_adj )

			#Process the scenario costs and trips the same way
			test_dir = s['location']
			#grab the files and put them in np arrays
			test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file'])
			test_trip_file=get_full_filename(location=test_dir,  filename=dmap['trip_file'])
			try:
				test_trips_raw = npa_from_file( test_trip_file)
				test_cost_per_trip_raw = npa_from_file( test_cost_file)			
			except:
				exc_type, exc_value, exc_traceback = sys.exc_info()
				msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))
				continue				
			test_name=s['name']
			test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw,  transpose=transpose,   hov_adj=hov_adj  )	
			#Scenario case trips*cost/trip and trips used 
			if np.equal(test_costs, base_costs).all() and np.isclose(test_trips, base_trips).all():
				if not "FARE" in test_cost_file and not "FARE" in base_cost_file:
					msg="\nWARN:  Same file: {} ::: {}"
					logger.critical(msg.format(base_trips_file, test_trip_file))
					logger.critical(msg.format( base_cost_file, test_cost_file))	
					logger.critical('\n')

			#With all costs gathered, calculate the change in costsin square np array; produces a square np array
			##cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs)
			## updated for total costs
			total_cost_delta=get_total_cost_delta(base_trips, test_trips, base_costs, test_costs)
			logger.debug('total_cost_delta  {}'.format(total_cost_delta.sum()))

			#From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers
			#  For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips.
			#  For mid-day and night-time highway trips, the benefit is split between origin and dest nodes.
			## updated for total costs
			#benefits_by_zone = calculate_benefits(cs_delta, pct_hb)
			costs_by_zone = calculate_benefits(total_cost_delta, pct_hb)

			#the balance of this block stores the benefits and other information for posterity/more analysis

			#We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles).  We can use scalars to transform
					#minutes to dollars, miles to CO2, etc. as a post-processing step.

			#We'll create an aggregation array if requested in the 'aggregate to' column.  	This bit of code adds the array to the arr_dict if needed,
			#   then creates a null np array if needed.   It adds a column of current benefits_by_zone to the array if the array is null; it increments 
			#  the array by the benefits just calculated otherwise.

			#to hold the results, we'll make a dict   arr_dict{'aggregate_to':  {'data':npa_of_data,
			#                                                                                                                              'column': 'aggregate_to',  
			#                                                                                                                              'table': 'db_table}}
			#... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column.

			#create the dict if needed (it doesnt yet exist)
			if not arr_name in arr_dict:
				arr_dict[arr_name]={}
				arr_dict[arr_name]['data']=None

			#update the dict with current state of the roll-up array
			arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], 
			                                                 vector= costs_by_zone, 
			                                                 max_index_val=len(base_trips)),
			                                           'column': column_name,
			                                           'table': table_name
			                                           }
			logger.debug('{} -versus- {} line {}\n\t {}  \n\t {} \n\t base trips: {}  test trips: {}  sum dlta costs: {}  (summary stats - not used in benefit calcs)'.format(
			   
			    base_name, test_name, 
			    line_ix,
			    dmap['trip_file'],
			    dmap['cost_file'].split()[0],
			    np.sum(base_trips), np.sum(test_trips),
			    np.sum(total_cost_delta)))

		#store the arrays in db tables	
		store_data(arr_dict=arr_dict, db=DB, zones=ZONES)

	finish = datetime.now()
	msg='Finished at {}.  Processed {} files in {}.'
	elapsed=str(finish-start).split('.')[0]
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
Пример #2
0
	def test_database_updates(self):
		"""makes sure that the database storage routine does all of the following:
		       - creates a new database if none exists
		       - adds new tables as needed
		       - adds new columns as needed and populates them with the right data.
		"""
		db='test_db'
		
		#test1:  will it store/retrieve a single array?
		
		create_new_tables=True
		
		test_data_1=np.array(( [[1,11], [2, 12], [3,13]] ))
		column='col1'
		table='table1'
		
		#create a fake dict with a bit of data	
		arr_dict={}
		arr_dict['test1']={ 'data': test_data_1,
				            'column': column,
				            'table':table
				            }		
		analyze_main.store_data(arr_dict=arr_dict, db = db, create_new_tables=create_new_tables, zones=3)
		
		#make a db connection (let main app create the db if needed)
		conn = psycopg2.connect(database = db,
				                user=login_info['user'],
				                password=login_info['password']
				                )
		
		curs = conn.cursor() 
		
		curs.execute('SELECT * from {}'.format(table))
		conn.commit()
		
		#do we get our single array?
		self.assertTrue( np.isclose(np.array(curs.fetchall()), test_data_1).all())
		
		
		#***Now try it with two columns in one table and two in another:
		test_data_1=np.array(( [[1,11], [2, 12], [3,13]] ))
		test_data_2=np.array(( [[1,110], [2, 120], [3,130]] ))
		
		
		column1='col1'
		column2='col2'
		table1='table1'
		table2='table2'
		
		#create a fake dict 	
		arr_dict={}
		arr_dict['test1']={ 'data': test_data_1,
				            'column': column1,
				            'table':table1
				            }	
		arr_dict['test2']={ 'data': test_data_1,
				            'column': column2,
				            'table':table1
				            }	
		arr_dict['test3']={ 'data': test_data_2,
				            'column': column1,
				            'table':table2
				            }	
		arr_dict['test4']={ 'data': test_data_2,
				            'column': column2,
				            'table':table2
				            }	
		analyze_main.store_data(arr_dict=arr_dict, db = db, create_new_tables=create_new_tables, zones=3)
		
		exp_table1=np.array([(1.0, 11.0, 11.0), (2.0, 12.0, 12.0), (3.0, 13.0, 13.0)])
		exp_table2=np.array([(1.0, 110.0, 110.0), (2.0, 120.0, 120.0), (3.0, 130.0, 130.0)])
		
		curs.execute('SELECT * from {}'.format(table1))
		actual_table1=curs.fetchall()
		
		curs.execute('SELECT * from {}'.format(table2))
		actual_table2=curs.fetchall()
		
		self.assertTrue( np.isclose(exp_table1, actual_table1).all())
		self.assertTrue ( np.isclose(exp_table2, actual_table2).all())