def main(scenarios=scenarios, DB=DB, ROOT_DIR=ROOT_DIR, ZONES=ZONES, map_file=map_file):
	"main entry point - loops over scenarios"

	msg='{} Starting total cost calculations using input file {}'
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), map_file))

	#This isn't the most efficient way to do it, but it's the most transparent:  we'll loop through each base:scenario pair.  For each, we'll read
	#  the input file a line at a time and draw our consumer surplus benefits

	
	base_scenario = scenarios[0]

	for s in scenarios[1:]:
		
		arr_dict={}
		#grab a reader for the input file
		reader=csv.DictReader(open(map_file))

		#process one line at a time from the setup file
		for line_ix, line in enumerate(reader, start=1):

			# the info comes in as a dict - this cleans up the content, removing comments, etc.  Return a dict.
			dmap = grabinfo(line)

			#these set processing parameters
			transpose=dmap['transpose']				#use transposed trip matrix?
			hov_adj=dmap['hov_adj']						#occupancy adjustment (hov2 implies double time costs, say)
			pct_hb=dmap['pct_hb'] 							#fraction of benefits occuring to origin node ('home base')		

			#these set storage parameters
			arr_name=dmap['rollup_to']                                                        ## updated for total costs
			column_name= arr_name
			table_name=s['name']+"_"+dmap['rollup_dbtable']		   ## updated for total costs
			
			#unless this line has both a data table and column specified, go to the next line
			if not arr_name or not column_name:
				continue

			#get information for the base case
			base_dir=base_scenario['location']		#root directory location
			base_name=base_scenario['name']		#name for this scenari

			#Build fully specified path names built from locations in mappyings.py; subdirectory determined by file name
			#   then create np arrays out of them
			base_cost_file=get_full_filename(location=base_dir, filename=dmap['cost_file'])
			base_trips_file=get_full_filename(location=base_dir,  filename=dmap['trip_file'])

			#try to create npa arrays from the raw data files; if they don't exist go on to the next line
			try:
				base_trips_raw = npa_from_file( base_trips_file)
				base_cost_per_trip_raw = npa_from_file( base_cost_file)	
			except:
				exc_type, exc_value, exc_traceback = sys.exc_info()
				msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))
				continue

			#Costs and trips for the base case - returns  base costs, base trips as  square np 
			#   arrays w/o OD headers, trips transposed if needed
			base_costs, base_trips=prep_data( base_cost_per_trip_raw , base_trips_raw,  transpose,  hov_adj )

			#Process the scenario costs and trips the same way
			test_dir = s['location']
			#grab the files and put them in np arrays
			test_cost_file=get_full_filename(location=test_dir, filename=dmap['cost_file'])
			test_trip_file=get_full_filename(location=test_dir,  filename=dmap['trip_file'])
			try:
				test_trips_raw = npa_from_file( test_trip_file)
				test_cost_per_trip_raw = npa_from_file( test_cost_file)			
			except:
				exc_type, exc_value, exc_traceback = sys.exc_info()
				msg='Scenario {}: could not open requisite files \n {} {} specified in line {} of {}'
				logger.warn(msg.format(s['name'], exc_type, exc_value, line_ix, map_file))
				continue				
			test_name=s['name']
			test_costs, test_trips=prep_data( cost_per_trip=test_cost_per_trip_raw , trips=test_trips_raw,  transpose=transpose,   hov_adj=hov_adj  )	
			#Scenario case trips*cost/trip and trips used 
			if np.equal(test_costs, base_costs).all() and np.isclose(test_trips, base_trips).all():
				if not "FARE" in test_cost_file and not "FARE" in base_cost_file:
					msg="\nWARN:  Same file: {} ::: {}"
					logger.critical(msg.format(base_trips_file, test_trip_file))
					logger.critical(msg.format( base_cost_file, test_cost_file))	
					logger.critical('\n')

			#With all costs gathered, calculate the change in costsin square np array; produces a square np array
			##cs_delta = get_cs_delta(base_trips, test_trips, base_costs, test_costs)
			## updated for total costs
			total_cost_delta=get_total_cost_delta(base_trips, test_trips, base_costs, test_costs)
			logger.debug('total_cost_delta  {}'.format(total_cost_delta.sum()))

			#From the cs_delta matrix, assign benefits to the origin and destination node; produces a vector of nodes w/o OD headers
			#  For home-based transit trips, both outbound and return accrue to home node, as do am and pm highway trips.
			#  For mid-day and night-time highway trips, the benefit is split between origin and dest nodes.
			## updated for total costs
			#benefits_by_zone = calculate_benefits(cs_delta, pct_hb)
			costs_by_zone = calculate_benefits(total_cost_delta, pct_hb)

			#the balance of this block stores the benefits and other information for posterity/more analysis

			#We'll aggregate the cs_delta by benefit type, denominated in natural units (minutes, dollars, miles).  We can use scalars to transform
					#minutes to dollars, miles to CO2, etc. as a post-processing step.

			#We'll create an aggregation array if requested in the 'aggregate to' column.  	This bit of code adds the array to the arr_dict if needed,
			#   then creates a null np array if needed.   It adds a column of current benefits_by_zone to the array if the array is null; it increments 
			#  the array by the benefits just calculated otherwise.

			#to hold the results, we'll make a dict   arr_dict{'aggregate_to':  {'data':npa_of_data,
			#                                                                                                                              'column': 'aggregate_to',  
			#                                                                                                                              'table': 'db_table}}
			#... where the 'aggregate_to' value comes from the input spreadsheet and serves as the name of the database column.

			#create the dict if needed (it doesnt yet exist)
			if not arr_name in arr_dict:
				arr_dict[arr_name]={}
				arr_dict[arr_name]['data']=None

			#update the dict with current state of the roll-up array
			arr_dict[arr_name]={ 'data': sum_col_to_np_array(npa=arr_dict[arr_name]['data'], 
			                                                 vector= costs_by_zone, 
			                                                 max_index_val=len(base_trips)),
			                                           'column': column_name,
			                                           'table': table_name
			                                           }
			logger.debug('{} -versus- {} line {}\n\t {}  \n\t {} \n\t base trips: {}  test trips: {}  sum dlta costs: {}  (summary stats - not used in benefit calcs)'.format(
			   
			    base_name, test_name, 
			    line_ix,
			    dmap['trip_file'],
			    dmap['cost_file'].split()[0],
			    np.sum(base_trips), np.sum(test_trips),
			    np.sum(total_cost_delta)))

		#store the arrays in db tables	
		store_data(arr_dict=arr_dict, db=DB, zones=ZONES)

	finish = datetime.now()
	msg='Finished at {}.  Processed {} files in {}.'
	elapsed=str(finish-start).split('.')[0]
	print(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
	logger.info(msg.format(datetime.now().strftime("%b %d %Y %H:%M:%S"), line_ix, elapsed))
Exemplo n.º 2
0
	def test_cons_surplus_calcs(self):
				
		#spin up a couple o'test arrays (same as above)
		base_trips=np.array(([  [ 0,   1,    2,   3],  
				                                      [1,   11, 12, 13],      
				                                      [2,   21, 22, 23],       
				                                      [3,   31, 32, 33]   ]))
		base_cost_per_trip=np.array(([   
				                                    [0,    1,    2,    3], 
				                                    [1, .10, .20, .30],   
				                                    [2, .40, .50, .60],  
				                                    [3, .70, .80, .90]    ]))
		
		#create a test case with triple trips, each at half price		
		test_trips=base_trips*3
		test_cost_per_trip=base_cost_per_trip* 1/2
		
		#We expect the benefits to be a function of cost and quantity, calculated cell-wise
		#              .5 * (base_trips + test_trips)*(base_costs - test_costs)
		
		exp_cs_delta=np.array(([[  1.1,   2.4,   3.9],                    #[.5*(11+11*3)*(.10-.10/2),     .5*(12+12*3)*(.20-.20/2),    .5*(13+13*3)*(.30-.30/2)]
				                                        [  8.4,  11. ,  13.8],                    #[.5*(21+21*3)*(.40-.40/2),     .5*(22+22*3)*(.50-.50/2),    .5*(23+23*3)*(.60-.60/2)]
				                                        [21.7,  25.6,  29.7]]))              #[.5*(31+31*3)*(.70-.70/2),     .5*(32+32*3)*(.80-.80/2),    .5*(33+33*3)*(.90-.90/2)]]
		
		#run get_costs() on both the base and test case - calculates cell-wise trips*cost/trip
		base_costs,  base_trips_used = analyze_main.prep_data(base_cost_per_trip, base_trips)
		test_costs,    test_trips_used = analyze_main.prep_data(test_cost_per_trip, test_trips)
		
		#calculate the consumer surplus delta with get_cs_delta(), cell-wise
		cs_delta = analyze_main.get_cs_delta(base_trips_used, test_trips_used, base_cost_per_trip[1:,1:], test_cost_per_trip[1:,1:])
		
		#do we have the expected values?		
		self.assertTrue( np.isclose(cs_delta, exp_cs_delta).all())
				
		
		#So far so good.  Now, we'll roll up these benefits by Zone - that's our unit of analysis to assign bennies by race.  
				#For each origin Zone, total benefits will be a sum of the benefits derived by trips to each destination.  In other words, the row sum.
								
		exp_benefits_by_zone=np.array(([[   7.4,                        #  1.1 +    2.4    +   3.9
		                                                                        33.2,                       #  8.4 +   11. , + 13.8
		                                                                        77.0]]))                  #  21.7 + 25.6,   29.7
		
		pct_hb = 100   #percent of benefits accruing to originating (home) Zone
		benefits_by_zone=analyze_main.calculate_benefits(cs_delta, pct_hb)
		self.assertTrue( np.isclose(benefits_by_zone, exp_benefits_by_zone).all())
		
		
		#For some trips (highway trips at night or mid-day), only half of the benefits accrue to the originating Zone.  The other half  accrue to the destination Zone.
		#  In these cases, the originating zone gets the benefits of half the trips to the destination (half the row sum).  The destination Zone gets the other half.  To 
		#   operationalize this, we'll give the origin zone half the row sum (each row is an origin); and we'll give the destination zone half the column sum.    Since any zone
		#  can be both an origin and a destination, we'll add up the 'Zone as origin' and 'Zone as destination' values for attirubtion to each zone:
				
		#Or should we count the 'destination' part of this at all?  *Someone* gets the benefit - I'd argue that if Chinese food is delivered to your Zone someone there
		#     benefits from a reduced-cost trip (cheaper food) or less time (hotter food).  On the other hand, the benefit might accue to the origin (lower labor cost
		#     for delivery guy, more repeat business from timely delivery).  Hmmmm.
		
																												#   credit for being origin + credit for being dest
		exp_benefits_by_zone=np.array(([[ 19.3,                        #  (1.1 +    2.4    +   3.9) /2     + (1.1 + 8.4 +3.9) /2         = 1/2 row sum row 1 + 1/2 col sum col 1
		                                                                       36.1,                       #  (8.4 +   11. , + 13.8)  /2     + (2.4 + 11. +13.8) /2      = 1/2 row sum row 2 + 1/2 col sum col 2
		                                                                       62.2]]))                  #  (21.7 + 25.6,   29.7) /2     + (1.1 +26.6 +29.7) /2     = 1/2 row sum row 3 + 1/2 col sum col 3
		
		pct_hb = 50   #percent of benefits accruing to originating (home) Zone
		benefits_by_zone=analyze_main.calculate_benefits(cs_delta, pct_hb)
		self.assertTrue( np.isclose(benefits_by_zone, exp_benefits_by_zone).all())
		
		#make sure that the total benefits are accounted for
		self.assertTrue(np.sum(benefits_by_zone) == np.sum(cs_delta))