def calculate_d_payback(self): cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') initial_vintage = min(cfg.supply_years) demand_side_df = self.demand.d_annual_costs_payback demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage] sales_df = copy.deepcopy(self.demand.outputs.d_sales) util.replace_index_name(sales_df,'vintage','year') sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage] sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years) sales_df.index = sales_df.index.reorder_levels(demand_side_df.index.names) sales_df = sales_df.reindex(demand_side_df.index).sort_index() self.demand.outputs.d_payback = util.DfOper.divi([demand_side_df, sales_df]) self.demand.outputs.d_payback = self.demand.outputs.d_payback[np.isfinite(self.demand.outputs.d_payback.values)] self.demand.outputs.d_payback = self.demand.outputs.d_payback.replace([np.inf,np.nan],0) for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'stock') and subsector.sub_type!='link': indexer = util.level_specific_indexer(self.demand.outputs.d_payback,'subsector',subsector.id) self.demand.outputs.d_payback.loc[indexer,'unit'] = subsector.stock.unit.upper() self.demand.outputs.d_payback = self.demand.outputs.d_payback.set_index('unit', append=True) self.demand.outputs.d_payback.columns = [cost_unit.upper()] self.demand.outputs.d_payback['lifetime_year'] = self.demand.outputs.d_payback.index.get_level_values('year')-self.demand.outputs.d_payback.index.get_level_values('vintage')+1 self.demand.outputs.d_payback = self.demand.outputs.d_payback.set_index('lifetime_year',append=True) self.demand.outputs.d_payback = util.remove_df_levels(self.demand.outputs.d_payback,'year') self.demand.outputs.d_payback = self.demand.outputs.d_payback.groupby(level = [x for x in self.demand.outputs.d_payback.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum()) self.demand.outputs.d_payback = self.demand.outputs.d_payback[self.demand.outputs.d_payback[cost_unit.upper()]!=0] self.demand.outputs.d_payback = self.demand.outputs.return_cleaned_output('d_payback')
def calc_and_format_embodied_costs(self): #calculate and format embodied supply costs embodied_costs_list = [Output.clean_df(x) for x in self.demand.outputs.demand_embodied_energy_costs] cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') for embodied_costs in embodied_costs_list: embodied_costs.columns = [cost_unit.upper()] embodied_costs_list = [util.add_to_df_index(x, names=['EXPORT/DOMESTIC', "SUPPLY/DEMAND"], keys=["DOMESTIC","SUPPLY"]) for x in embodied_costs_list] return embodied_costs_list
def calculate_tco(self): cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = self.demand.d_levelized_costs_tco demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] service_demand_df = self.demand.d_service_demand_tco service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage] keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]), util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])], keys=keys,names=names) self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0) self.outputs.c_tco[self.outputs.c_tco<0]=0 for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'): indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id) self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper() self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True) self.outputs.c_tco.columns = [cost_unit.upper()] self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0] self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
def calc_and_format_export_costs(self): #calculate and format export costs if self.supply.export_costs is None: return None export_costs = GeoMapper.geo_map(self.supply.export_costs.copy(), GeoMapper.supply_primary_geography, GeoMapper.combined_outputs_geography, 'total') export_costs = Output.clean_df(export_costs) util.replace_index_name(export_costs, 'FINAL_ENERGY', 'SUPPLY_NODE_EXPORT') export_costs = util.add_to_df_index(export_costs, names=['EXPORT/DOMESTIC', "SUPPLY/DEMAND"], keys=["EXPORT", "SUPPLY"]) cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') export_costs.columns = [cost_unit.upper()] return export_costs
def calculate_combined_emissions_results(self): export_emissions = self.calc_and_format_export_emissions() embodied_emissions_list = self.calc_and_format_embodied_supply_emissions() direct_emissions_list = self.calc_and_format_direct_demand_emissions() export_emissions = util.add_and_set_index(export_emissions,['EMISSIONS_TYPE'],['EXPORTED']) embodied_emissions_list = [util.add_and_set_index(x, ['EMISSIONS_TYPE'], ['SUPPLY_SIDE']) for x in embodied_emissions_list] direct_emissions_list = [util.add_and_set_index(x,['EMISSIONS_TYPE'],['DEMAND_SIDE']) for x in direct_emissions_list] if export_emissions is not None: for name in [x for x in embodied_emissions_list[0].index.names if x not in export_emissions.index.names]: export_emissions[name] = "N/A" export_emissions.set_index(name,append=True,inplace=True) export_emissions = export_emissions.groupby(level=embodied_emissions_list[0].index.names).sum() if direct_emissions_list is not None: for df in direct_emissions_list: for name in [x for x in embodied_emissions_list[0].index.names if x not in df.index.names]: df[name] = "N/A" df.set_index(name,append=True,inplace=True) self.outputs.c_emissions = [export_emissions] + embodied_emissions_list + direct_emissions_list self.outputs.c_emissions = [util.replace_index_name(x, GeoMapper.combined_outputs_geography.upper() +'-EMITTED', GeoMapper.combined_outputs_geography.upper() +'_SUPPLY',inplace=True) for x in self.outputs.c_emissions] self.outputs.c_emissions = [util.replace_index_name(x, GeoMapper.combined_outputs_geography.upper() +'-CONSUMED', GeoMapper.combined_outputs_geography.upper(),inplace=True) for x in self.outputs.c_emissions] self.outputs.c_emissions = [x[x['VALUE']!=0] for x in self.outputs.c_emissions] emissions_unit = cfg.getParam('mass_unit') for x in self.outputs.c_emissions: x.columns = [emissions_unit.upper()] for x in self.outputs.c_emissions: x.index = x.index.reorder_levels([l for l in embodied_emissions_list[0].index.names if l in x.index.names])
def __init__(self, database_path=None): db = CsvDatabase.get_database(database_path) db.shapes.load_all() self.cfg_weather_years = [ int(y) for y in cfg.getParam('weather_years').split(',') ] self.active_dates_index = self.get_active_dates(self.cfg_weather_years) self.active_dates_index_unique = self.active_dates_index.unique() self.time_slice_elements = create_time_slice_elements( self.active_dates_index) self.num_active_years = num_active_years(self.active_dates_index) self.cfg_hash_tuple = self.get_hash_tuple() self.cfg_hash = hash(self.cfg_hash_tuple) if len(db.shapes.slices.keys()) == 0: raise ValueError( "No shapes data found, check path to the database. The folder ShapeData must be located in the database folder specified" ) shape_meta = db.get_table("Shapes").data self.data = {} for i, meta in shape_meta.iterrows(): if meta['name'] not in db.shapes.slices.keys(): logging.error( 'Skipping shape {}: cannot find shape data'.format( meta['name'])) continue if meta['is_active']: self.data[meta['name']] = Shape( meta, db.shapes.get_slice(meta['name']), self.active_dates_index, self.active_dates_index_unique, self.time_slice_elements, self.num_active_years) self.process_active_shapes()
def __init__(self, name, scenario=None): super(DispatchFeederAllocation, self).__init__(name, scenario=scenario) self.init_from_db(name, scenario) if self.raw_values is not None: assert (self.raw_values.groupby(level=['year', self.geography]).sum() == 1).all().all() self.remap(map_from='raw_values', map_to='values', converted_geography=getParam('demand_primary_geography')) self.values.sort_index(inplace=True)
def get_hash_tuple(cls): cfg_weather_years = [ int(y) for y in cfg.getParam('weather_years').split(',') ] geography_check = (GeoMapper.demand_primary_geography, GeoMapper.supply_primary_geography, tuple(sorted(GeoMapper.primary_subset)), tuple(GeoMapper.breakout_geography)) cfg_hash_tuple = geography_check + tuple(cfg_weather_years) return cfg_hash_tuple
def calculate_combined_cost_results(self): cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') export_costs = self.calc_and_format_export_costs() embodied_costs_list = self.calc_and_format_embodied_costs() direct_costs = self.calc_and_format_direct_demand_costs() export_costs = util.add_and_set_index(export_costs,['COST_TYPE'],['EXPORTED']) embodied_costs_list = [util.add_and_set_index(x,['COST_TYPE'],['SUPPLY-SIDE']) for x in embodied_costs_list] direct_costs = util.add_and_set_index(direct_costs,['COST_TYPE'],['DEMAND-SIDE']) if export_costs is not None: for name in [x for x in embodied_costs_list[0].index.names if x not in export_costs.index.names]: export_costs[name] = "N/A" export_costs.set_index(name,append=True,inplace=True) export_costs = export_costs.groupby(level=embodied_costs_list[0].index.names).sum() if direct_costs is not None: for name in [x for x in embodied_costs_list[0].index.names if x not in direct_costs.index.names]: direct_costs[name] = "N/A" direct_costs.set_index(name, append=True, inplace=True) direct_costs = direct_costs.groupby(level=embodied_costs_list[0].index.names).sum() self.outputs.c_costs = embodied_costs_list + [direct_costs] + [export_costs] self.outputs.c_costs= [x[x.values!=0] for x in self.outputs.c_costs] for x in self.outputs.c_costs: x.index = x.index.reorder_levels(embodied_costs_list[0].index.names)
def __init__(self, database_path): # Initiate pint for unit conversions self.ureg = pint.UnitRegistry() self.cfg_energy_unit = cfg.getParam('calculation_energy_unit') self.cfg_currency = cfg.getParam('currency_name') self.cfg_currency_year = cfg.getParamAsInt('currency_year') db = get_database(database_path) self.currency_table = db.get_table("CurrenciesConversion").data self.currency_table = self.currency_table.set_index( ['currency', 'year']).sort_index() self.inflation_table = db.get_table("InflationConversion").data self.inflation_table = self.inflation_table.set_index( ['currency', 'year']).sort_index() for unit_def in UnitConverter._unit_defs: unit_name = unit_def.split(' = ')[0] if hasattr(self.ureg, unit_name): logging.debug( 'pint already has unit {}, unit is not being redefined'. format(unit_name)) continue self.ureg.define(unit_def)
def convert(self): model_energy_unit = cfg.calculation_energy_unit model_time_step = cfg.getParam('time_step') if self.time_unit is not None: # if sales has a time_unit, then the unit is energy and must be converted to capacity self.values = UnitConverter.unit_convert(self.values, unit_from_num=self.capacity_or_energy_unit, unit_from_den=self.time_unit, unit_to_num=model_energy_unit, unit_to_den=model_time_step) else: # if sales is a capacity unit, the model must convert the unit type to an energy unit for conversion () unit_from_num = self.capacity_or_energy_unit + "_" + model_time_step self.values = UnitConverter.unit_convert(self.values, unit_from_num=unit_from_num, unit_from_den=model_time_step, unit_to_num=model_energy_unit, unit_to_den=model_time_step)
def standardize_time_across_timezones(self, df): tz = pytz.timezone(cfg.getParam('dispatch_outputs_timezone')) offset = (tz.utcoffset(DT.datetime(2015, 1, 1)) + tz.dst(DT.datetime(2015, 1, 1))).total_seconds() / 60. new_index = pd.DatetimeIndex(self.active_dates_index_unique, tz=pytz.FixedOffset(offset)) # if we have hydro year, when this does a reindex, it can introduce NaNs, so we want to remove them after assert not df.isnull().any().any() standardize_df = util.reindex_df_level_with_new_elements( df.copy(), 'weather_datetime', new_index) levels = [n for n in df.index.names if n != 'weather_datetime'] standardize_df = standardize_df.groupby(level=levels).fillna( method='bfill').fillna(method='ffill') standardize_df = standardize_df[~standardize_df.isnull().values] return standardize_df
def convert(self): """ convert values to model currency and capacity (energy_unit/time_step) """ if self.values is not None: model_energy_unit = cfg.calculation_energy_unit model_time_step = cfg.getParam('time_step') if self.time_unit is not None: self.values = UnitConverter.unit_convert(self.values/self.input_timestep, unit_from_num=self.capacity_or_energy_unit, unit_from_den=self.time_unit, unit_to_num=model_energy_unit, unit_to_den=model_time_step) else: unit_from_num = self.capacity_or_energy_unit + "_" + model_time_step self.values = UnitConverter.unit_convert(self.values/self.input_timestep, unit_from_num=unit_from_num, unit_from_den=model_time_step, unit_to_num=model_energy_unit, unit_to_den=model_time_step)
def convert(self): """ convert raw_values to model currency and capacity (energy_unit/time_step) """ model_energy_unit = cfg.calculation_energy_unit model_time_step = cfg.getParam('time_step') if hasattr(self, 'time_unit') and self.time_unit is not None: # if a cost has a time_unit, then the unit is energy and must be converted to capacity self.values = UnitConverter.unit_convert(self.raw_values, unit_from_den=self.capacity_or_energy_unit, unit_from_num=self.time_unit, unit_to_den=model_energy_unit, unit_to_num=model_time_step) else: # if a cost is a capacity unit, the model must convert the unit type to an energy unit for conversion () unit_from_den = self.capacity_or_energy_unit + "_" + model_time_step self.values = UnitConverter.unit_convert(self.raw_values, unit_from_den=unit_from_den, unit_from_num=model_time_step, unit_to_den=model_energy_unit, unit_to_num=model_time_step) if self.definition == 'absolute': self.values = UnitConverter.currency_convert(self.values, self.currency, self.currency_year) self.absolute = True else: self.absolute = False
def min_year(self): """calculates the minimum or start year of data in the technology specification. Used to determine start year of subsector for analysis.""" attributes = vars(self) self.min_year = cfg.getParam('current_year') for att in attributes: obj = getattr(self, att) if inspect.isclass(type(obj)) and hasattr( obj, '__dict__') and hasattr(obj, 'raw_values'): try: att_min_year = min( obj.raw_values.index.levels[util.position_in_index( obj.raw_values, 'vintage')]) except: att_min_year = self.min_year if att_min_year < self.min_year: self.min_year = att_min_year else: pass
def localize_shapes(self, df): """ Step through time zone and put each profile maped to time zone in that time zone """ local_df = [] for tz, group in df.groupby(level='time zone'): # get the time zone name and figure out the offset from UTC tz = pytz.timezone(self.time_zone or format_timezone_str(tz)) _dt = DT.datetime(2015, 1, 1) offset = (tz.utcoffset(_dt) + tz.dst(_dt)).total_seconds() / 60. # localize and then convert to dispatch_outputs_timezone df2 = group.tz_localize(pytz.FixedOffset(offset), level='weather_datetime') local_df.append(df2) tz = pytz.timezone(cfg.getParam('dispatch_outputs_timezone')) offset = (tz.utcoffset(DT.datetime(2015, 1, 1)) + tz.dst(DT.datetime(2015, 1, 1))).total_seconds() / 60. local_df = pd.concat(local_df).tz_convert(pytz.FixedOffset(offset), level='weather_datetime') return local_df.sort_index()
def convert(self): """ convert raw_values to model currency and capacity (energy_unit/time_step) """ self.values = UnitConverter.currency_convert(self.values, self.currency, self.currency_year) model_energy_unit = cfg.calculation_energy_unit model_time_step = cfg.getParam('time_step') if self.time_unit is not None: # if a cost has a time_unit, then the unit is energy and must be converted to capacity self.values = UnitConverter.unit_convert( self.values, unit_from_den=self.self.capacity_or_energy_unit, unit_from_num=self.time_unit, unit_to_den=model_energy_unit, unit_to_num=model_time_step) else: self.values = UnitConverter.unit_convert( self.values, unit_from_den=self.capacity_or_energy_unit + "_" + model_time_step, unit_to_den=model_energy_unit)
def main(): #Get param param = getParam() ID = param['ID'] label = param['label'] #Load data train = pd.read_csv('data/' + str(param['train_file']), encoding='u8') test = pd.read_csv('data/' + str(param['test_file']), encoding='u8') #Drop features drop_list = param['drop_list'] df_train = train.drop(drop_list, axis=1) df_test = test.drop(drop_list, axis=1) #Assign feature and label selected_cols = df_train.columns[(df_train.columns != ID) & (df_train.columns != label)] X_train, X_test = df_train.loc[:, selected_cols], df_test.loc[:, selected_cols] Y_train, Y_test = df_train[label], df_test[label] #Fillna X_train = naTransformer(X_train) X_test = naTransformer(X_test) #Deal with Date feature date_cols = param['date_cols'] X_train = dateTransformer(X_train, date_cols) X_test = dateTransformer(X_test, date_cols) #Deal with Category Feature dict_train = X_train.to_dict(orient='records') dict_test = X_test.to_dict(orient='records') vectorizer = DictVectorizer() vec_train = vectorizer.fit_transform(dict_train) vec_test = vectorizer.transform(dict_test) #Store the feature names features = vectorizer.feature_names_ feature_map('xgb' + str(sys.argv[2]) + '.fmap', features) #Cross validation kf = KFold(n_splits=5, shuffle=True) output_test = pd.DataFrame() for train_index, val_index in kf.split(Y_train): #print("TRAIN:", train_index, "VAL:", val_index) feature_train, feature_val = vec_train[train_index, :], vec_train[ val_index, :] label_train, label_val = Y_train[train_index], Y_train[val_index] ###XGBOOST Model dtrain = xgb.DMatrix(feature_train, label=label_train) dval = xgb.DMatrix(feature_val, label=label_val) #dtest = xgb.DMatrix(vec_test, label=Y_test) param = { 'max_depth': 4, 'eta': 0.1, 'silent': 1, 'objective': 'binary:logistic', 'nthread': 29, 'eval_metric': 'auc' } evallist = [(dval, 'val'), (dtrain, 'train')] num_round = int(sys.argv[1]) bst = xgb.train(param.items(), dtrain, num_round, evallist) break ################################################################### dtrain = xgb.DMatrix(vec_train, label=Y_train) dtest = xgb.DMatrix(vec_test, label=Y_test) bst = xgb.train(param.items(), dtrain, num_round) #dump model bst.dump_model('xgb' + str(sys.argv[2]) + '.dump', fmap='xgb' + str(sys.argv[2]) + '.fmap', with_stats=True) bst.save_model('test' + str(sys.argv[2]) + '.model') feature_score = bst.get_score(fmap='xgb.fmap', importance_type='gain') #for key, value in feature_score.iteritems(): # print key.encode('u8'), value #print datetime.now() dtest = xgb.DMatrix(vec_test, label=Y_test) output_test['XGB'] = bst.predict(dtest) #print datetime.now() output_test['ID'] = test[ID].astype(str) output_test['Label'] = test[label] #output_test['Money'] = test[u'本金余额'] output_test[['ID', 'XGB', 'Label']].to_csv('data/result.csv', index=False, encoding='u8')