def make_zone_lookup_table(): # Start with full Assessment table df_zones = df_assessment.copy() # Remove missing zones and duplicate addresses df_zones = df_zones.dropna(subset=[util.ZONING_CODE_1]) df_zones = df_zones.drop_duplicates( subset=[ADDR, util.LADDR_ALT_STREET_NUMBER]) # Split normalized address into street number and name df_zones[STREET_NUMBER] = df_zones.apply( lambda row: get_street_number(row, ADDR), axis=1) df_zones[STREET_NAME] = df_zones.apply( lambda row: get_street_name(row, ADDR), axis=1) # Isolate columns to be saved df_zones = df_zones[[ STREET_NUMBER, util.LADDR_ALT_STREET_NUMBER, STREET_NAME, util.ZONING_CODE_1 ]] # Sort on street name df_zones = df_zones.sort_values( by=[STREET_NAME, STREET_NUMBER, util.LADDR_ALT_STREET_NUMBER]) # Create table in database util.create_table('ZoneLookup', conn, cur, df=df_zones) return df_zones
def save_model(target_table_name, xl_model, conn, cur, engine): # Get list of sheets to be saved in target table sheet_names = xl_model['sheet_names'] print('\nSheets {0}'.format(sheet_names)) print(' Saving to table {0}'.format(target_table_name)) # Load dataframe with contents of target table df = pd.read_sql_table(target_table_name, engine, index_col=util.ID, parse_dates=util.get_date_columns( dc_elections[sheet_names[0]]['df'])) # Append all sheets to the dataframe for sheet_name in sheet_names: dc_elections[sheet_name]['df'] = dc_elections[sheet_name][ 'df'].reindex(columns=df.columns) df = df.append(dc_elections[sheet_name]['df']) # Drop duplicate rows df = df.drop_duplicates() # Recreate the table in the database util.create_table(target_table_name, conn, cur, df=df)
def create_batch_results_file(m, scenario=None): # create a file to hold batch results, but only if it doesn't already exist # (if it exists, we keep it, so multiple threads can write to it as needed) output_file = os.path.join(output_dir, "summary_all_scenarios.tsv") if not os.path.isfile(output_file): util.create_table( output_file=output_file, headings=summary_headers(m, scenario) )
def create_positions_table(): db_file = DB_FILE sql_create_positions_table = """ CREATE TABLE Positions ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, position TEXT NOT NULL, unique (position) ); """ create_table(db_file, sql_create_positions_table)
def create_foot_table(): db_file = DB_FILE sql_create_foot_table = """ CREATE TABLE Foot ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, foot TEXT NOT NULL, unique (foot) ); """ create_table(db_file, sql_create_foot_table)
def create_countries_table(): db_file = DB_FILE sql_create_countries_table = """ CREATE TABLE Countries ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, country TEXT NOT NULL, unique (country) ); """ create_table(db_file, sql_create_countries_table)
def create_cities_table(): db_file = DB_FILE sql_create_cities_table = """ CREATE TABLE Cities ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, city TEXT NOT NULL, country_id INTEGER NOT NULL, FOREIGN KEY (country_id) REFERENCES Countries(id), unique (city) ); """ create_table(db_file,sql_create_cities_table)
def write_batch_results(m): # append results to the batch results file, creating it if needed output_file = os.path.join(m.options.outputs_dir, "demand_response_summary.tsv") # create a file to hold batch results if it doesn't already exist # note: we retain this file across scenarios so it can summarize all results, # but this means it needs to be manually cleared before launching a new # batch of scenarios (e.g., when running get_scenario_data or clearing the # scenario_queue directory) if not os.path.isfile(output_file): util.create_table(output_file=output_file, headings=summary_headers(m)) util.append_table(m, output_file=output_file, values=lambda m: summary_values(m))
def create_stadiums_table(): db_file = DB_FILE sql_create_stadiums_table = """ CREATE TABLE Stadiums ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, city_id INTEGER, country_id INTEGER, capacity INTEGER, FOREIGN KEY (city_id) REFERENCES Cities(id), FOREIGN KEY (country_id) REFERENCES Countries(id) ); """ create_table(db_file,sql_create_stadiums_table)
def create_transfers_table(): db_file = DB_FILE sql_create_transfers_table = """ CREATE TABLE Transfers ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, season TEXT, out_club TEXT NOT NULL, in_club TEXT NOT NULL, date TEXT, FOREIGN KEY (player_id) REFERENCES Players(id), FOREIGN KEY (out_club) REFERENCES Clubs(id), FOREIGN KEY (in_club) REFERENCES Clubs(id) ); """ create_table(db_file, sql_create_transfers_table)
def create_batch_results_file(m, tag=None): # create a file to hold batch results, but only if it doesn't already exist # (if it exists, we keep it, so multiple threads can write to it as needed) # format the tag to append to file names (if any) if tag is not None: t = "_"+str(tag) else: t = "" output_file = os.path.join(output_dir, "summary{t}.txt".format(t=t)) if not os.path.isfile(output_file): util.create_table( output_file=output_file, headings= ("max_demand_response_share", "total_cost", "cost_per_kwh") +tuple('cost_per_kwh_'+str(p) for p in m.PERIODS) )
def create_clubs_table(): db_file = DB_FILE sql_create_clubs_table = """ CREATE TABLE Clubs ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, full_name TEXT, dob TEXT, city_id INTEGER, country_id INTEGER, stadium_id INTEGER, FOREIGN KEY (city_id) REFERENCES Cities(id), FOREIGN KEY (country_id) REFERENCES Countries(id), FOREIGN KEY (stadium_id) REFERENCES Stadiums(id) ); """ create_table(db_file, sql_create_clubs_table)
def save_elections(dc_elections, dc_models): print('\n\n-- Loading database --') # Open the database conn, cur, engine = util.open_database(args.output_filename, args.create) # Get list of election models from database dc_db_models = get_db_models(conn, cur) # Iterate over Excel models to determine how sheets following that model should be saved for model_name in dc_models: # Get next Excel model xl_model = set(dc_models[model_name]['model']) sheet_names = dc_models[model_name]['sheet_names'] # Search for a matching model among database models target_table_name = None for table_name in dc_db_models: # Get next database model db_model = dc_db_models[table_name] # Calculate differences between Excel and database models db_minus_xl = db_model - xl_model xl_minus_db = xl_model - db_model # If they match, set the target table name and quit loop if (len(db_minus_xl) == 0) and (len(xl_minus_db) == 0): target_table_name = table_name break # If table with matching model not found in database, create an empty one if not target_table_name: target_table_name = TABLE_NAME_PREFIX + '{:02d}'.format( len(dc_db_models) + 1) util.create_table(target_table_name, conn, cur, columns=dc_models[model_name]['model']) dc_db_models = get_db_models(conn, cur) save_model(target_table_name, dc_models[model_name], conn, cur, engine)
def create_players_table(): db_file = DB_FILE sql_create_players_table = """ CREATE TABLE Players ( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, first_name TEXT NOT NULL, last_name TEXT NOT NULL, name_ob TEXT, city_id TEXT, citizenship TEXT, date_ob TEXT, foot_id TEXT, height INTEGER, position_id TEXT, link TEXT, FOREIGN KEY (city_id) REFERENCES Cities(id), FOREIGN KEY (foot_id) REFERENCES Foot(id), FOREIGN KEY (position_id) REFERENCES Positions(id) ); """ create_table(db_file,sql_create_players_table)
def post_iterate(m): print "\n\n=======================================================" print "Solved model" print "=======================================================" print "Total cost: ${v:,.0f}".format(v=value(m.SystemCost)) # TODO: # maybe calculate prices for the next round here and attach them to the # model, so they can be reported as final prices (currently we don't # report the final prices, only the prices prior to the final model run) SystemCost = value(m.SystemCost) # calculate once to save time print "prev_SystemCost={}, SystemCost={}, ratio={}".format( m.prev_SystemCost, SystemCost, None if m.prev_SystemCost is None else SystemCost / m.prev_SystemCost) tag = m.options.scenario_name outputs_dir = m.options.outputs_dir # report information on most recent bid if m.iteration_number == 0: util.create_table( output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), headings=("bid_num", "load_zone", "timeseries", "timepoint", "marginal_cost", "price", "bid_load", "wtp", "base_price", "base_load")) b = m.DR_BID_LIST.last() # current bid util.append_table(m, m.LOAD_ZONES, m.TIMEPOINTS, output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), values=lambda m, z, tp: ( b, z, m.tp_ts[tp], m.tp_timestamp[tp], m.prev_marginal_cost[z, tp], m.dr_price[b, z, tp], m.dr_bid[b, z, tp], m.dr_bid_benefit[b, z, m.tp_ts[tp]], m.base_data_dict[z, tp][1], m.base_data_dict[z, tp][0], )) # store the current bid weights for future reference if m.iteration_number == 0: util.create_table(output_file=os.path.join( outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), headings=("iteration", "load_zone", "timeseries", "bid_num", "weight")) util.append_table(m, m.LOAD_ZONES, m.TIMESERIES, m.DR_BID_LIST, output_file=os.path.join( outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), values=lambda m, z, ts, b: (len(m.DR_BID_LIST), z, ts, b, m.DRBidWeight[b, z, ts])) # report the dual costs write_dual_costs(m) # if m.iteration_number % 5 == 0: # # save time by only writing results every 5 iterations # write_results(m) write_results(m) write_batch_results(m)
# Open the master database conn, cur, engine = util.open_database( args.master_filename, False ) # Read election tables from database election_columns = [util.ELECTION_DATE, util.ELECTION_TYPE] df_e1 = pd.read_sql_table( 'ElectionModel_01', engine, columns=election_columns ) df_e2 = pd.read_sql_table( 'ElectionModel_02', engine, columns=election_columns ) df_e3 = pd.read_sql_table( 'ElectionModel_03', engine, columns=election_columns ) # Combine election data into one dataframe listing all elections, by date and type df_elections = df_e1.append( df_e2 ).append( df_e3 ) df_history = df_elections.drop_duplicates( subset=[ util.ELECTION_DATE, util.ELECTION_TYPE ] ) df_history = df_history.sort_values( by=[util.ELECTION_DATE] ).reset_index( drop=True ) # Isolate election year df_history[util.ELECTION_YEAR] = df_history[util.ELECTION_DATE].str.split( '-', expand=True )[0].astype( int ) # Count per-election turnout df_history[util.TURNOUT] = df_history.apply( lambda row: len( df_elections[ ( df_elections[util.ELECTION_DATE] == row[util.ELECTION_DATE] ) & ( df_elections[util.ELECTION_TYPE] == row[util.ELECTION_TYPE] ) ] ), axis=1 ) # Generate factors for calculating voter engagement scores df_history[util.RECENCY_FACTOR] = df_history[util.ELECTION_YEAR] - df_history[util.ELECTION_YEAR].min() + 1 df_history[util.TURNOUT_FACTOR] = round( 100 * df_history[util.TURNOUT].min() / df_history[util.TURNOUT] ).astype( int ) df_history[util.SCORE] = df_history[util.TURNOUT_FACTOR] * df_history[util.RECENCY_FACTOR] # Save result to database util.create_table( 'ElectionHistory', conn, cur, df=df_history ) # Report elapsed time util.report_elapsed_time()
if col.startswith(util.VOTED + '_local_'): recent_local_columns.append(col) recent_local_columns = recent_local_columns[( -1 * util.RECENT_LOCAL_ELECTION_COUNT):] df[util.RECENT_LOCAL_ELECTIONS_VOTED] = df.apply( lambda row: count_recent_local_votes(row), axis=1) # Find columns counting attendance at recent town meetings recent_meeting_columns = [] for col in df.columns: if col.startswith(util.TOWN_MEETINGS_ATTENDED + '_'): recent_meeting_columns.append(col) table_name = 'Partisans_' + args.party df = df.reindex(columns=(util.COLUMN_ORDER[table_name] + recent_local_columns + recent_meeting_columns)) # Sort on partisan score df = df.sort_values(by=[ util.PARTISAN_SCORE, util.VOTER_ENGAGEMENT_SCORE, util.RECENT_LOCAL_ELECTIONS_VOTED ], ascending=False) # Save result to database util.create_table(table_name, conn, cur, df=df) # Report elapsed time util.report_elapsed_time()
# Clean the input df = clean_table( df ) # If we got a clean table, append it to the result if df is not None: if df_result is None: df_result = df else: df_result = df_result.append( df ) else: exit( '\nError: Could not clean table for "{0}"\n'.format( input_path ) ) # Sort result table on date df_result = df_result.sort_values( by=[util.DATE_ISSUED] ) # Open output file conn, cur, engine = util.open_database( args.output_filename, args.create ) # Save result to database util.create_table( args.output_table_name, conn, cur, df=df_result ) # Optionally save audit to Excel file if args.audit_filename is not None: print( '\nSaving audit to {}'.format( args.audit_filename ) ) df_audit.to_excel( args.audit_filename, index=False ) # Report elapsed time util.report_elapsed_time()
# Drop rows of totals df_raw = df_raw[df_raw[util.OFFICE_OR_CANDIDATE] != 'Totals'] # Create copy df = df_raw.copy() # Create empty columns df[util.OFFICE] = '' df[util.CANDIDATE] = '' df[util.TOTAL] = '' office = '' for index, row in df_raw.iterrows(): if pd.isnull(row[util.PRECINCT_1]): office = row[util.OFFICE_OR_CANDIDATE] else: df.at[index, util.OFFICE] = office df.at[index, util.CANDIDATE] = row[util.OFFICE_OR_CANDIDATE] df.at[index, util.TOTAL] = row[util.PRECINCT_1:util.PRECINCT_9].sum() df = df.drop(columns=[util.OFFICE_OR_CANDIDATE]) df = df.dropna(subset=[util.PRECINCT_1]) # Save result to database util.create_table('LocalElectionResults', conn, cur, df=df) # Report elapsed time util.report_elapsed_time()
axis=1) df_partitions[util.MEAN_VOTER_ENGAGEMENT_SCORE] = df_partitions.apply( lambda row: calculate_mean_engagement_score(row, args.partition_column ), axis=1) df_partitions[ util.MEAN_LIKELY_DEM_VOTER_ENGAGEMENT_SCORE] = df_partitions.apply( lambda row: calculate_mean_engagement_score( row, args.partition_column, util.D), axis=1) df_partitions[ util.MEAN_LIKELY_REPUB_VOTER_ENGAGEMENT_SCORE] = df_partitions.apply( lambda row: calculate_mean_engagement_score( row, args.partition_column, util.R), axis=1) df_partitions[util.MEAN_TOTAL_ASSESSED_VALUE] = df_partitions.apply( lambda row: calculate_mean_assessed_value(row, args.partition_column), axis=1).astype(int) # Sort on partition df_partitions[args.partition_column] = df_partitions[ args.partition_column].astype(str) df_partitions = df_partitions.sort_values(by=[args.partition_column]) df_partitions = df_partitions.reset_index(drop=True) # Save result to database util.create_table(args.table_name, conn, cur, df=df_partitions) # Report elapsed time util.report_elapsed_time()
def post_iterate(m): print "\n\n=======================================================" print "Solved model" print "=======================================================" print "Total cost: ${v:,.0f}".format(v=value(m.SystemCost)) # TODO: # maybe calculate prices for the next round here and attach them to the # model, so they can be reported as final prices (currently we don't # report the final prices, only the prices prior to the final model run) SystemCost = value(m.SystemCost) # calculate once to save time print "prev_SystemCost={}, SystemCost={}, ratio={}".format( m.prev_SystemCost, SystemCost, None if m.prev_SystemCost is None else SystemCost/m.prev_SystemCost ) tag = m.options.scenario_name outputs_dir = m.options.outputs_dir # report information on most recent bid if m.iteration_number == 0: util.create_table( output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), headings=( "bid_num", "load_zone", "timeseries", "timepoint", "marginal_cost", "price", "bid_load", "wtp", "base_price", "base_load" ) ) b = m.DR_BID_LIST.last() # current bid util.append_table(m, m.LOAD_ZONES, m.TIMEPOINTS, output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), values=lambda m, z, tp: ( b, z, m.tp_ts[tp], m.tp_timestamp[tp], m.prev_marginal_cost[z, tp], m.dr_price[b, z, tp], m.dr_bid[b, z, tp], m.dr_bid_benefit[b, z, m.tp_ts[tp]], m.base_data_dict[z, tp][1], m.base_data_dict[z, tp][0], ) ) # store the current bid weights for future reference if m.iteration_number == 0: util.create_table( output_file=os.path.join(outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), headings=("iteration", "load_zone", "timeseries", "bid_num", "weight") ) util.append_table(m, m.LOAD_ZONES, m.TIMESERIES, m.DR_BID_LIST, output_file=os.path.join(outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), values=lambda m, z, ts, b: (len(m.DR_BID_LIST), z, ts, b, m.DRBidWeight[b, z, ts]) ) # report the dual costs write_dual_costs(m) # if m.iteration_number % 5 == 0: # # save time by only writing results every 5 iterations # write_results(m) write_results(m) write_batch_results(m)
def update_demand(m): """ This should be called after solving the model, in order to calculate new bids to include in future runs. The first time through, it also uses the fixed demand and marginal costs to calibrate the demand system, and then replaces the fixed demand with the flexible demand system. """ first_run = (m.base_data is None) outputs_dir = m.options.outputs_dir tag = m.options.scenario_name print "attaching new demand bid to model" if first_run: calibrate_model(m) util.create_table( output_file=os.path.join(outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), headings=("iteration", "load_zone", "timeseries", "bid_num", "weight") ) else: # not first run # print "m.DRBidWeight (first day):" # print [(b, lz, ts, value(m.DRBidWeight[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TIMESERIES] print "m.DRBidWeight:" pprint([(lz, ts, [(b, value(m.DRBidWeight[b, lz, ts])) for b in m.DR_BID_LIST]) for lz in m.LOAD_ZONES for ts in m.TIMESERIES]) #print "DR_Convex_Bid_Weight:" #m.DR_Convex_Bid_Weight.pprint() # store the current bid weights for future reference # This should be done before adding the new bid. util.append_table(m, m.LOAD_ZONES, m.TIMESERIES, m.DR_BID_LIST, output_file=os.path.join(outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), values=lambda m, lz, ts, b: (len(m.DR_BID_LIST), lz, ts, b, m.DRBidWeight[b, lz, ts]) ) # get new bids from the demand system at the current prices bids = get_bids(m) print "adding bids to model" # print "first day (lz, ts, prices, demand, wtp) =" # pprint(bids[0]) # add the new bids to the model add_bids(m, bids) print "m.dr_bid_benefit (first day):" pprint([(b, lz, ts, value(m.dr_bid_benefit[b, lz, ts])) for b in m.DR_BID_LIST for lz in m.LOAD_ZONES for ts in [m.TIMESERIES.first()]]) # print "m.dr_bid (first day):" # print [(b, lz, ts, value(m.dr_bid[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TS_TPS[m.TIMESERIES.first()]] if first_run: # replace lz_demand_mw with FlexibleDemand in the energy balance constraint # note: it is easiest to do this after retrieving the bids because this # destroys the dual values which are needed for calculating the bids # note: the first two lines are simpler than the method I use, but my approach # preserves the ordering of the list, which is nice for reporting. # m.LZ_Energy_Components_Consume.remove('lz_demand_mw') # m.LZ_Energy_Components_Consume.append('FlexibleDemand') ecc = m.LZ_Energy_Components_Consume ecc[ecc.index('lz_demand_mw')] = 'FlexibleDemand' reconstruct_energy_balance(m)
def add_bids(m, bids): """ accept a list of bids written as tuples like (lz, ts, prices, demand, wtp) where lz is the load zone, ts is the timeseries, demand is a list of demand levels for the timepoints during that series, and wtp is the private benefit from consuming the amount of power in that bid. Then add that set of bids to the model """ # create a bid ID and add it to the list of bids if len(m.DR_BID_LIST) == 0: b = 1 else: b = max(m.DR_BID_LIST) + 1 tag = m.options.scenario_name outputs_dir = m.options.outputs_dir m.DR_BID_LIST.add(b) # m.DR_BIDS_LZ_TP.reconstruct() # m.DR_BIDS_LZ_TS.reconstruct() # add the bids for each load zone and timepoint to the dr_bid list for (lz, ts, prices, demand, wtp) in bids: # record the private benefit m.dr_bid_benefit[b, lz, ts] = wtp # record the level of demand for each timepoint timepoints = m.TS_TPS[ts] # print "ts: "+str(ts) # print "demand: " + str(demand) # print "timepoints: " + str([t for t in timepoints]) for i, d in enumerate(demand): # print "i+1: "+str(i+1) # print "d: "+str(d) # print "timepoints[i+1]: "+str(timepoints[i+1]) # note: demand is a python list or array, which uses 0-based indexing, but # timepoints is a pyomo set, which uses 1-based indexing, so we have to shift the index by 1. m.dr_bid[b, lz, timepoints[i + 1]] = d m.dr_price[b, lz, timepoints[i + 1]] = prices[i] print "len(m.DR_BID_LIST): {l}".format(l=len(m.DR_BID_LIST)) print "m.DR_BID_LIST: {b}".format(b=[x for x in m.DR_BID_LIST]) # store bid information for later reference # this has to be done after the model is updated and # before DRBidWeight is reconstructed (which destroys the duals) if b == 1: util.create_table( output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), headings=("bid_num", "load_zone", "timeseries", "timepoint", "marginal_cost", "price", "bid_load", "wtp", "base_price", "base_load")) util.append_table(m, m.LOAD_ZONES, m.TIMEPOINTS, output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), values=lambda m, lz, tp: ( b, lz, m.tp_ts[tp], m.tp_timestamp[tp], electricity_marginal_cost(m, lz, tp), m.dr_price[max(m.DR_BID_LIST), lz, tp], m.dr_bid[max(m.DR_BID_LIST), lz, tp], m.dr_bid_benefit[b, lz, m.tp_ts[tp]], m.base_data_dict[lz, tp][1], m.base_data_dict[lz, tp][0], )) write_results(m) write_batch_results(m) # reconstruct the components that depend on m.DR_BID_LIST, m.dr_bid_benefit and m.dr_bid m.DRBidWeight.reconstruct() m.DR_Convex_Bid_Weight.reconstruct() m.FlexibleDemand.reconstruct() m.DR_Welfare_Cost.reconstruct() # it seems like we have to reconstruct the higher-level components that depend on these # ones (even though these are Expressions), because otherwise they refer to objects that # used to be returned by the Expression but aren't any more (e.g., versions of DRBidWeight # that no longer exist in the model). # (i.e., Energy_Balance refers to the items returned by FlexibleDemand instead of referring # to FlexibleDemand itself) reconstruct_energy_balance(m) m.SystemCostPerPeriod.reconstruct() m.SystemCost.reconstruct()
def add_bids(m, bids): """ accept a list of bids written as tuples like (lz, ts, prices, demand, wtp) where lz is the load zone, ts is the timeseries, demand is a list of demand levels for the timepoints during that series, and wtp is the private benefit from consuming the amount of power in that bid. Then add that set of bids to the model """ # create a bid ID and add it to the list of bids if len(m.DR_BID_LIST) == 0: b = 1 else: b = max(m.DR_BID_LIST) + 1 tag = m.options.scenario_name outputs_dir = m.options.outputs_dir m.DR_BID_LIST.add(b) # m.DR_BIDS_LZ_TP.reconstruct() # m.DR_BIDS_LZ_TS.reconstruct() # add the bids for each load zone and timepoint to the dr_bid list for (lz, ts, prices, demand, wtp) in bids: # record the private benefit m.dr_bid_benefit[b, lz, ts] = wtp # record the level of demand for each timepoint timepoints = m.TS_TPS[ts] # print "ts: "+str(ts) # print "demand: " + str(demand) # print "timepoints: " + str([t for t in timepoints]) for i, d in enumerate(demand): # print "i+1: "+str(i+1) # print "d: "+str(d) # print "timepoints[i+1]: "+str(timepoints[i+1]) # note: demand is a python list or array, which uses 0-based indexing, but # timepoints is a pyomo set, which uses 1-based indexing, so we have to shift the index by 1. m.dr_bid[b, lz, timepoints[i+1]] = d m.dr_price[b, lz, timepoints[i+1]] = prices[i] print "len(m.DR_BID_LIST): {l}".format(l=len(m.DR_BID_LIST)) print "m.DR_BID_LIST: {b}".format(b=[x for x in m.DR_BID_LIST]) # store bid information for later reference # this has to be done after the model is updated and # before DRBidWeight is reconstructed (which destroys the duals) if b == 1: util.create_table( output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), headings=( "bid_num", "load_zone", "timeseries", "timepoint", "marginal_cost", "price", "bid_load", "wtp", "base_price", "base_load" ) ) util.append_table(m, m.LOAD_ZONES, m.TIMEPOINTS, output_file=os.path.join(outputs_dir, "bid_{t}.tsv".format(t=tag)), values=lambda m, lz, tp: ( b, lz, m.tp_ts[tp], m.tp_timestamp[tp], electricity_marginal_cost(m, lz, tp), m.dr_price[max(m.DR_BID_LIST), lz, tp], m.dr_bid[max(m.DR_BID_LIST), lz, tp], m.dr_bid_benefit[b, lz, m.tp_ts[tp]], m.base_data_dict[lz, tp][1], m.base_data_dict[lz, tp][0], ) ) write_results(m) write_batch_results(m) # reconstruct the components that depend on m.DR_BID_LIST, m.dr_bid_benefit and m.dr_bid m.DRBidWeight.reconstruct() m.DR_Convex_Bid_Weight.reconstruct() m.FlexibleDemand.reconstruct() m.DR_Welfare_Cost.reconstruct() # it seems like we have to reconstruct the higher-level components that depend on these # ones (even though these are Expressions), because otherwise they refer to objects that # used to be returned by the Expression but aren't any more (e.g., versions of DRBidWeight # that no longer exist in the model). # (i.e., Energy_Balance refers to the items returned by FlexibleDemand instead of referring # to FlexibleDemand itself) reconstruct_energy_balance(m) m.SystemCostPerPeriod.reconstruct() m.SystemCost.reconstruct()
how='any') # Sort df_water = df_water.sort_values(by=[util.SERVICE_ID, util.CURRENT_DATE]) # Optionally remove detail if args.summary: df_water = summarize(df_water) # Calculate usage df_water[util.CU_FT] = (df_water[util.CURRENT_READING] - df_water[util.PRIOR_READING]).astype(int) df_water[util.ELAPSED_DAYS] = ( df_water[util.CURRENT_DATE] - df_water[util.PRIOR_DATE]).astype('timedelta64[D]').astype(int) df_water = df_water[df_water[util.ELAPSED_DAYS] > 0] df_water[util.CU_FT_PER_DAY] = round( df_water[util.CU_FT] / df_water[util.ELAPSED_DAYS], 1) df_water[util.GAL_PER_DAY] = round(df_water[util.CU_FT_PER_DAY] * util.GAL_PER_CU_FT).astype(int) # Save result to database util.create_table( ('WaterCustomers' if args.summary else 'WaterConsumption'), conn, cur, df=df_water) # Report elapsed time util.report_elapsed_time()
print "%d posts fetched" % len(posts) next_page_link = find_next_page_url(page) if next_page_link is not None: return fetch(urlparse.urljoin(url, next_page_link)) else: return else: return def find_all_posts(content): pattern = re.compile("http://[^.]+.tumblr.com/post/\d+[^\"]", re.IGNORECASE | re.MULTILINE) matches = pattern.findall(content) return matches def find_next_page_url(content): pattern = re.compile("(/archive\?before_time=\d+)", re.IGNORECASE | re.MULTILINE) match = pattern.search(content) if match is not None: return match.group(1) return None if __name__ == "__main__": if len(sys.argv) == 2: util.create_table() run(sys.argv[1]) else: print "invalid request, sample: python spider.py massimo"
def update_demand(m, tag): """ This should be called after solving the model, in order to calculate new bids to include in future runs. The first time through, it also uses the fixed demand and marginal costs to calibrate the demand system, and then replaces the fixed demand with the flexible demand system. """ global baseData first_run = (baseData is None) if first_run: # first time through, calibrate the demand system and add it to the model # baseData consists of a list of tuples showing (load_zone, timeseries, baseLoad (list) and basePrice) # note: the constructor below assumes list comprehensions will preserve the order of the underlying list # (which is guaranteed according to http://stackoverflow.com/questions/1286167/is-the-order-of-results-coming-from-a-list-comprehension-guaranteed) # calculate the average-cost price for the current study period # TODO: find basePrice for each period that corresponds to the assumptions # used in the base-case load forecast # TODO: add in something for the fixed costs, to make marginal cost commensurate with the basePrice # for now, we just use a flat price roughly equal to backstop generation. #baseCosts = [m.dual[m.EnergyBalance[lz, tp]] for lz in m.LOAD_ZONES for tp in m.TIMEPOINTS] basePrice = 110 # average-cost price ($/MWh) baseData = [( lz, ts, [m.lz_demand_mw[lz, tp] for tp in m.TS_TPS[ts]], basePrice ) for lz in m.LOAD_ZONES for ts in m.TIMESERIES] util.create_table( output_file=os.path.join("outputs", "bid_weights_{t}.txt".format(t=tag)), headings=("iteration", "load_zone", "timeseries", "bid_num", "weight") ) else: # print "m.DRBidWeight (first day):" # print [(b, lz, ts, value(m.DRBidWeight[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TIMESERIES] print "m.DRBidWeight:" pprint([(lz, ts, [(b, value(m.DRBidWeight[b, lz, ts])) for b in m.DR_BID_LIST]) for lz in m.LOAD_ZONES for ts in m.TIMESERIES]) #print "DR_Convex_Bid_Weight:" #m.DR_Convex_Bid_Weight.pprint() # store the current bid weights for future reference # This should be done before adding the new bid. util.append_table(m, m.LOAD_ZONES, m.TIMESERIES, m.DR_BID_LIST, output_file=os.path.join("outputs", "bid_weights_{t}.txt".format(t=tag)), values=lambda m, lz, ts, b: (len(m.DR_BID_LIST), lz, ts, b, m.DRBidWeight[b, lz, ts]) ) # get new demand bids at the current marginal costs bids = [] for i, (lz, ts, baseLoad, basePrice) in enumerate(baseData): # TODO: add in something for the fixed costs prices = [m.dual[m.Energy_Balance[lz, tp]]/m.bring_timepoint_costs_to_base_year[tp] for tp in m.TS_TPS[ts]] # set a floor on prices to avoid division-by-zero in the CES functions prices = [max(0.0001, p) for p in prices] # if i < 2: # print "prices (day {i}): {p}".format(i=i, p=prices) # print "weights: {w}".format(w=[m.bring_timepoint_costs_to_base_year[tp] for tp in m.TS_TPS[ts]]) if '_ce_' in tag: (demand, wtp) = ce.bid(prices, baseLoad, basePrice) else: demand = ces.double_ces(prices, Theta, baseLoad, basePrice) wtp = ces.wtp(prices, Theta, baseLoad, basePrice) bids.append((lz, ts, prices, demand, wtp)) # if i < 2: # import pdb; pdb.set_trace() print "adding bids to model; first day=" pprint(bids[0]) # add the new bids to the model add_bids(m, bids, tag) print "m.dr_bid_benefit (first day):" pprint([(b, lz, ts, value(m.dr_bid_benefit[b, lz, ts])) for b in m.DR_BID_LIST for lz in m.LOAD_ZONES for ts in [m.TIMESERIES.first()]]) # print "m.dr_bid (first day):" # print [(b, lz, ts, value(m.dr_bid[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TS_TPS[m.TIMESERIES.first()]] if first_run: # add FlexibleDemand to the energy balance constraint and remove lz_demand_mw_as_consumption # note: it is easiest to do this after retrieving the bids because this # destroys the dual values which are needed for calculating the bids m.LZ_Energy_Balance_components.remove('lz_demand_mw_as_consumption') m.LZ_Energy_Balance_components.append('FlexibleDemand') m.Energy_Balance.reconstruct()
# Mark participation of residents in general elections mark_who_voted_when() # Add water consumption statistics add_water_data() # Sort df_residents = df_residents.sort_values( by=[util.NORMALIZED_STREET_NAME, util.NORMALIZED_STREET_NUMBER] ) # Drop columns not wanted in database drop_columns = [ util.DATE_OF_BIRTH, util.LEGAL_REFERENCE_SALE_DATE, util.NAL_DESCRIPTION, util.PREVIOUS_LEGAL_REFERENCE_SALE_DATE, util.PREVIOUS_NAL_DESCRIPTION ] df_residents = df_residents.drop( columns=drop_columns ) # Optionally drop debug columns if not args.debug: debug_columns = [ ] df_residents = df_residents.drop( columns=debug_columns ) # Save result to database util.create_table( 'Residents', conn, cur, df=df_residents ) # Report elapsed time util.report_elapsed_time()
def add_bids(m, bids, tag): """ accept a list of bids written as tuples like (lz, ts, prices, demand, wtp) where lz is the load zone, ts is the timeseries, demand is a list of demand levels for the timepoints during that series, and wtp is the private benefit from consuming the amount of power in that bid. Then add that set of bids to the model """ # create a bid ID and add it to the list of bids if len(m.DR_BID_LIST) == 0: b = 1 else: b = max(m.DR_BID_LIST) + 1 # sometimes the demand side reports a strangely high willingness to pay for a particular bundle. # then, when prices change, it chooses other bundles, but reports a lower willingness to pay # for them than for the earlier bundle. This suggests that wtp for the earlier bundle is # overstated. So here we go back and reduce wtp for some earlier bundles based on the fact that # they were not selected at the current prices (so they must actually have a lower wtp than the # current bundle). This has the effect of gradually forgetting older bids that aren't re-offered # as the model converges toward final prices. # for (lz, ts, prices, demand, wtp) in bids: # cs_new = wtp - sum_product(prices, demand) # for bid in m.DR_BID_LIST: # cs_old = value(m.dr_bid_benefit[bid, lz, ts]) \ # - sum_product(prices, [value(m.dr_bid[bid, lz, tp]) for tp in m.TS_TPS[ts]]) # if cs_old > cs_new: # # the old bid is reported to have higher consumer surplus at the current prices # # than the new bid. # # this shouldn't happen, but it does. # # reduce implied consumer surplus for the old bid so it is no more than the cs for the new bid # # at the current prices. # if 'drop_bad_bids' in tag: # print "dropping bid {b} from model because wtp is too high.".format(b=(bid, lz, ts)) # m.dr_bid_benefit[bid, lz, ts] -= (cs_old - cs_new + 1e7) # if 'adj_bad_bids' in tag: # print "reducing wtp for bid {b} by ${adj}".format(b=(bid, lz, ts), adj=cs_old-cs_new) # m.dr_bid_benefit[bid, lz, ts] -= (cs_old - cs_new) m.DR_BID_LIST.add(b) # m.DR_BIDS_LZ_TP.reconstruct() # m.DR_BIDS_LZ_TS.reconstruct() # add the bids for each load zone and timepoint to the dr_bid list for (lz, ts, prices, demand, wtp) in bids: # record the private benefit m.dr_bid_benefit[b, lz, ts] = wtp # record the level of demand for each timepoint timepoints = m.TS_TPS[ts] # print "ts: "+str(ts) # print "demand: " + str(demand) # print "timepoints: " + str([t for t in timepoints]) for i, d in enumerate(demand): # print "i+1: "+str(i+1) # print "d: "+str(d) # print "timepoints[i+1]: "+str(timepoints[i+1]) # note: demand is a python list or array, which uses 0-based indexing, but # timepoints is a pyomo set, which uses 1-based indexing, so we have to shift the index by 1. m.dr_bid[b, lz, timepoints[i+1]] = d print "len(m.DR_BID_LIST): {l}".format(l=len(m.DR_BID_LIST)) print "m.DR_BID_LIST: {b}".format(b=[x for x in m.DR_BID_LIST]) # store bid information for later reference # this has to be done after the model is updated and # before DRBidWeight is reconstructed (which destroys the duals) if b == 1: util.create_table( output_file=os.path.join("outputs", "bid_{t}.txt".format(t=tag)), headings=("bid_num", "load_zone", "timepoint_label", "marginal_cost", "bid", "wtp") ) util.append_table(m, m.LOAD_ZONES, m.TIMEPOINTS, output_file=os.path.join("outputs", "bid_{t}.txt".format(t=tag)), values=lambda m, lz, tp: ( b, lz, m.tp_timestamp[tp], m.dual[m.Energy_Balance[lz, tp]]/m.bring_timepoint_costs_to_base_year[tp], m.dr_bid[max(m.DR_BID_LIST), lz, tp], m.dr_bid_benefit[b, lz, m.tp_ts[tp]] ) ) # reconstruct the components that depend on m.DR_BID_LIST, m.dr_bid_benefit and m.dr_bid m.DRBidWeight.reconstruct() m.DR_Convex_Bid_Weight.reconstruct() m.FlexibleDemand.reconstruct() m.DR_Welfare_Cost.reconstruct() # it seems like we have to reconstruct the higher-level components that depend on these # ones (even though these are Expressions), because otherwise they refer to objects that # used to be returned by the Expression but aren't any more (e.g., versions of DRBidWeight # that no longer exist in the model). # (i.e., Energy_Balance refers to the items returned by FlexibleDemand instead of referring # to FlexibleDemand itself) m.Energy_Balance.reconstruct() m.SystemCostPerPeriod.reconstruct() m.Minimize_System_Cost.reconstruct() # may not be needed, since it seems to store the rule
def update_demand(m): """ This should be called after solving the model, in order to calculate new bids to include in future runs. The first time through, it also uses the fixed demand and marginal costs to calibrate the demand system, and then replaces the fixed demand with the flexible demand system. """ first_run = (m.base_data is None) outputs_dir = m.options.outputs_dir tag = m.options.scenario_name print "attaching new demand bid to model" if first_run: calibrate_model(m) util.create_table(output_file=os.path.join( outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), headings=("iteration", "load_zone", "timeseries", "bid_num", "weight")) else: # not first run # print "m.DRBidWeight (first day):" # print [(b, lz, ts, value(m.DRBidWeight[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TIMESERIES] print "m.DRBidWeight:" pprint([(lz, ts, [(b, value(m.DRBidWeight[b, lz, ts])) for b in m.DR_BID_LIST]) for lz in m.LOAD_ZONES for ts in m.TIMESERIES]) #print "DR_Convex_Bid_Weight:" #m.DR_Convex_Bid_Weight.pprint() # store the current bid weights for future reference # This should be done before adding the new bid. util.append_table( m, m.LOAD_ZONES, m.TIMESERIES, m.DR_BID_LIST, output_file=os.path.join(outputs_dir, "bid_weights_{t}.tsv".format(t=tag)), values=lambda m, lz, ts, b: (len(m.DR_BID_LIST), lz, ts, b, m.DRBidWeight[b, lz, ts])) # get new bids from the demand system at the current prices bids = get_bids(m) print "adding bids to model" # print "first day (lz, ts, prices, demand, wtp) =" # pprint(bids[0]) # add the new bids to the model add_bids(m, bids) print "m.dr_bid_benefit (first day):" pprint([(b, lz, ts, value(m.dr_bid_benefit[b, lz, ts])) for b in m.DR_BID_LIST for lz in m.LOAD_ZONES for ts in [m.TIMESERIES.first()]]) # print "m.dr_bid (first day):" # print [(b, lz, ts, value(m.dr_bid[b, lz, ts])) # for b in m.DR_BID_LIST # for lz in m.LOAD_ZONES # for ts in m.TS_TPS[m.TIMESERIES.first()]] if first_run: # replace lz_demand_mw with FlexibleDemand in the energy balance constraint # note: it is easiest to do this after retrieving the bids because this # destroys the dual values which are needed for calculating the bids # note: the first two lines are simpler than the method I use, but my approach # preserves the ordering of the list, which is nice for reporting. # m.LZ_Energy_Components_Consume.remove('lz_demand_mw') # m.LZ_Energy_Components_Consume.append('FlexibleDemand') ecc = m.LZ_Energy_Components_Consume ecc[ecc.index('lz_demand_mw')] = 'FlexibleDemand' reconstruct_energy_balance(m)
def copy_data(csv_file, headers): global errors with open(csv_file, 'r') as f: next(f) # skip first row csvreader = csv.DictReader(f, fieldnames=headers) for row in csvreader: try: insert_row(row) conn.commit() global total total += 1 except Exception as e: errors.append(row) raise if __name__ == "__main__": util.create_table(cur, 'pluto') for csv_file in csv_file_list(csv_dir): print('processing ' + csv_file) copy_data(csv_file, headers) print('total inserted: ' + str(total)) print('lines with errors: ' + str(len(errors))) if len(errors) > 0: with open('problem_lines.csv', 'w') as f: for line in errors: f.write(str(line) + "\n") print('problem_lines.csv saved!')