def calculate(country, parallel=True, save=True): """ Base function to perform the sensitivity analysis for a country. Arguments: *country* (string) -- ISO2 code of country to consider. *parallel* (bool) -- calculates all regions within a country parallel. Set to False if you have little capacity on the machine (default: **True**). *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**). Returns: *dataframe* -- Pandas dataframe with all outcomes per parameter combination. """ # set data path data_path = load_config()['paths']['data'] #make sure the country inserted is an ISO2 country name for he remainder of the analysis country = coco.convert(names=country, to='ISO2') # get data path data_path = load_config()['paths']['data'] # create country poly files poly_files(data_path, country) #download OSM file if it is not there yet: download_osm_file(country) samples, storm_list = prepare_sens_analysis() #get list of regions for which we have poly files (should be all) regions = os.listdir(os.path.join(data_path, country, 'NUTS3_POLY')) regions = [x.split('.')[0] for x in regions] if parallel == True: samples = len(regions) * [samples] storms = len(regions) * [storm_list] save = len(regions) * [save] with Pool(cpu_count() - 2) as pool: country_table = pool.starmap(region_sens_analysis, zip(regions, samples, storms, save), chunksize=1) else: country_table = [] for region in regions: country_table.append( region_sens_analysis(region, samples, storm_list, save)) return country_table
def make_transfers_section(): margin_style = {"margin-top": "1rem", "margin-bottom": "2rem"} config = load_config() data_loader = DataLoader(config) df_league = data_loader.get_league_standings() manager_ids = df_league["entry_id"].unique().tolist() manager_names = df_league["entry_name"].unique().tolist() manager_options = [{ 'label': manager, 'value': manager_id } for manager, manager_id in zip(manager_names, manager_ids)] dropdown_manager = make_dropdown('manager-selection-transfers', manager_options, placeholder="Select Manager ...") num_transfers = [1, 2, 3, 4] transfer_options = [{'label': num, 'value': num} for num in num_transfers] dropdown_num_transfers = make_dropdown( 'transfer-selection-numbers', transfer_options, placeholder="Select number of transfers ...") dropdown_section = html.Div(children=[ html.Div(dropdown_manager, className='col-6'), html.Div(dropdown_num_transfers, className='col-6'), ], className='row') section = html.Div(children=[ html.Div("Transfer Suggestion", className='subtitle inline-header'), dropdown_section, html.Div(make_button("Submit", 'transfer-optimization-btn')), dcc.Loading(html.Div(id='transfer-suggestion-output', style=margin_style), color='black') ]) return section
def extract_buildings(area, country, NUTS3=True): """Extracts building from OpenStreetMap pbf file and saves it to an ESRI shapefile Arguments: *area* (string) -- name of area to clip *country* (string) -- ISO2 code of country to consider. *NUTS3* (bool) -- specify whether it will be a clip of NUTS3 region or the whole country (default: **True**) """ # get data path data_path = load_config()['paths']['data'] wgs = os.path.join(data_path, country, 'NUTS3_BUILDINGS', '{}_buildings.shp'.format(area)) if NUTS3 == True: pbf = os.path.join(data_path, country, 'NUTS3_OSM', '{}.osm.pbf'.format(area)) else: pbf = os.path.join(data_path, 'OSM', '{}.osm.pbf'.format(area)) os.system('ogr2ogr -progress -f "ESRI shapefile" {} {} -sql "select \ building,amenity from multipolygons where building is not null" \ -lco ENCODING=UTF-8 -nlt POLYGON -skipfailures'.format(wgs, pbf))
def convert_buildings(area, country): """Converts the coordinate system from EPSG:4326 to EPSG:3035. Arguments: *area* (string) -- name of area (most often NUTS3) for which buildings should be converted to European coordinate system. *country* (string) -- ISO2 code of country to consider. Returns: *GeoDataframe* -- Geopandas dataframe with all buildings of the selected area """ # get data path data_path = load_config()['paths']['data'] # path to area with buildings etrs = os.path.join(data_path, country, 'NUTS3_BUILDINGS', '{}_buildings.shp'.format(area)) # load data input_ = gpd.read_file(etrs) input_ = input_.to_crs(epsg=3035) return input_
def risk(country, save=True, parallel=True): """ Estimate risk based on event set Arguments: *country* (string) -- ISO2 code of country to consider. *path_to_eventset* (string) -- the location of the event set in the data directory *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**). *parallel* (bool) -- calculates all regions within a country parallel. Set to False if you have little capacity on the machine (default: **True**). """ # get data path data_path = load_config()['paths']['data'] gdf_buildings = losses(country, parallel=parallel, event_set=True, save=True) if save == True: gdf_buildings.drop(['centroid'], axis='columns', inplace=True) gdf_buildings.to_file( os.path.join(data_path, 'output_risk', 'risk_{}.shp'.format(country))) return gdf_buildings
def make_league_search_section(): config = load_config() league_ids = config["leagues"] league_options = [{'label': this_id, 'value': this_id} for this_id in league_ids] dropdown_section = make_dropdown('league-search-dropdown', league_options, placeholder="Select League ID ...") return dropdown_section
def region_sens_analysis(region, samples, sens_analysis_storms=[], save=True): """Perform a sensitivity analysis for the specified region, based on a predefined list of storms. Arguments: *region* (string) -- nuts code of region to consider. *samples* (list) -- list o tuples, where each tuple is a **unique** set of parameter values. *sens_analysis_storms* (list) -- if empty, it will fill with default list *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**) Returns: *list* -- list with the total losses per storm for all parameter combinations """ data_path = load_config()['paths']['data'] country = region[:2] # select storms to assess if len(sens_analysis_storms) == 0: sens_analysis_storms = [ '19991203', '19900125', '20090124', '20070118', '19991226' ] storm_list = sens_analysis_storms all_combis = list(product(samples, storm_list)) #load max dam max_dam = load_max_dam(data_path) #load curves curves = load_curves(data_path) # get exposure table output_table = region_exposure(region, include_storms=True, event_set=False, sens_analysis_storms=storm_list, save=True) # calculate losses for all combinations output_file = pd.DataFrame(index=list(range(len(samples))), columns=sens_analysis_storms) for iter_, (sample, storm) in enumerate(all_combis): output_file.loc[iter_, storm] = list( loss_calculation(storm, country, output_table, max_dam, curves, sample).sum())[0] if save == True: output_file.to_csv( os.path.join(data_path, 'output_sens', '{}_sens_analysis'.format(region))) return (output_file)
def query_league_history_data(league_id): config = load_config() data_scraper = DataScraper(config) data_processor = DataProcessor(config) data_loader = DataLoader(config) data_processor.save_classic_league_history(league_id) df = data_loader.get_league_gw_history(league_id) return df
def get_top_eo(): config = load_config() data_loader = DataLoader(config) df_top = data_loader.get_top_manager_picks() n_players = int(len(df_top) / 15.0) df_stats = df_top.groupby('element')["multiplier"].agg("sum").reset_index() df_stats["Top EO"] = df_stats["multiplier"] * 100.0 / n_players df_stats["Top EO"] = df_stats["Top EO"].round(2) df_stats = df_stats[["element", "Top EO"]].copy() return df_stats
def loss_per_country(figure_output_path='test_country.png'): '''This function is used to plot the total losses per year per country. Arguments: *figure_output_path* (string) -- path to location where you want to save the figure Returns: *A saved figure* ''' data_path = load_config()['paths']['data'] countries = [ 'AT', 'BE', 'DK', 'FR', 'DE', 'IE', 'LU', 'NL', 'NO', 'SE', 'UK', 'PL', 'IT', 'FI' ] country_names = [ 'Austria', 'Belgium', 'Denmark', 'France', 'Germany', 'Ireland', 'Luxembourg', 'Netherlands', 'Norway', 'Sweden', 'United Kingdom', 'Poland', 'Italy', 'Finland' ] cols_to_load = ['Storm'] + countries all_storm = pd.read_excel(os.path.join(data_path, 'output_storms.xlsx'), sheet_name='total_losses') #,index_col=0) all_storm = all_storm[cols_to_load] all_storm['Storm'] = pd.to_datetime(all_storm['Storm']) all_storm.set_index('Storm', inplace=True) all_storm.rename(columns=dict(zip(countries, country_names)), inplace=True) loss_per_year = all_storm.resample("A").sum() loss_per_year['Year'] = loss_per_year.index.year loss_per_year.set_index('Year', inplace=True) fig, ax_yc = plt.subplots(figsize=(10, 8)) loss_per_year.plot.bar(ax=ax_yc, stacked=True, width=0.9, ec="w", lw=0.1, colormap="Paired") plt.setp(ax_yc.get_xticklabels(), rotation=80) ax_yc.set_xlabel("Years", fontweight='bold') ax_yc.set_ylabel("Loss in million dollar", fontweight='bold') #ax_yc.set_yticks(np.arange(0,66000,2500), minor=False) ax_yc.set_ylim(0, 40000) ax_yc.legend(loc='upper right', frameon=True, prop={'size': 12}) ax_yc.patch.set_facecolor('0.98') # AND SAVE THE FIGURE plt.savefig(figure_output_path, dpi=600, bbox_inches='tight')
def get_league_eo(league_id): config = load_config() data_loader = DataLoader(config) print(league_id) df_league = data_loader.get_league_standings(league_id) managers = df_league["entry_id"].unique().tolist() dfs = [] for manager in managers: df = pd.DataFrame(data_loader.get_manager_current_gw_picks(manager)) dfs.append(df) df_eo = pd.concat(dfs) n_players = int(len(df_eo) / 15.0) df_stats = df_eo.groupby('element')["multiplier"].agg("sum").reset_index() df_stats["League EO"] = df_stats["multiplier"] * 100.0 / n_players df_stats["League EO"] = df_stats["League EO"].round(2) df_stats = df_stats[["element", "League EO"]].copy() return df_stats
def load_leads_current_gw(): config = load_config() data_loader = DataLoader(config) current_gw = data_loader.get_next_gameweek_id() - 1 print(current_gw) df_leads = load_leads(current_gw) print(df_leads) try: print(df_leads.head()) except: # no scores available df_leads = pd.DataFrame() df_leads["player_id"] = [-1, -1] df_leads["LGBM Point"] = [-1, -1] df_leads["Fast Point"] = [-1, -1] df_leads['xP'] = (df_leads["LGBM Point"] + df_leads["Fast Point"]) / 2.0 df_leads['xP'] = df_leads['xP'].round(2) return df_leads
def load_sens_analysis_storms( storm_name_list=[ '19991203', '19900125', '20090124', '20070118', '19991226' ]): """ This file load the storms used to perform the sensitivity analysis. Arguments: *storm_name_list* (list) -- list of storms to include in the sensitivity analysis. The default storms are **Anatol**, **Daria**, **Klaus**, **Kyrill** and **Lothar**. Returns: *storm_list* (list) -- same list of storms but now with full paths to location in directory. """ data_path = load_config()['paths']['data'] storm_list = [] for root, dirs, files in os.walk(os.path.join(data_path, 'STORMS')): for file in files: for storm in storm_name_list: if storm in file: storm_list.append(os.path.join(data_path, 'STORMS', file)) return storm_list
def risk_map(figure_output_path='test_risk_map.png'): """This function is used to create a map with the total risk per region. Arguments: *figure_output_path* (string) -- path to location where you want to save the figure Returns: *A saved figure* """ data_path = load_config()['paths']['data'] countries = [ 'LU', 'AT', 'BE', 'DK', 'FR', 'DE', 'IE', 'NL', 'NO', 'SE', 'UK', 'PL', 'IT', 'FI', 'CH', 'EE', 'LV', 'LT', 'PT', 'ES', 'CZ' ] NUTS3 = gpd.read_file( os.path.join(data_path, 'input_data', 'NUTS3_ETRS.shp')) NUTS3 = NUTS3.to_crs(epsg=4326) NUTS3 = NUTS3[NUTS3.STAT_LEVL_ == 3] NUTS3['Sum'] = 0 for country in countries: output_table = pd.DataFrame() for root, dirs, files in os.walk( os.path.join("F:\Dropbox\VU_DATA\WISC", "output_risk", country)): for file in files: nuts_name = file[:-9] output_table = pd.DataFrame( pd.read_csv(os.path.join("F:\Dropbox\VU_DATA\WISC", "output_risk", country, file), index_col=0, encoding='cp1252')['Risk']) output_table['Risk'] = output_table['Risk'].astype(float) output_table = output_table.replace([np.inf, -np.inf], np.nan).dropna(how='all') output_table.loc[output_table.Risk < 0.5] = 0 # TOTAL NUTS3.loc[NUTS3.NUTS_ID == nuts_name, 'Sum'] = output_table['Risk'].sum(axis=0) / 1000000 NUTS3 = NUTS3[NUTS3.Sum > 0] NUTS3.to_file(os.path.join(data_path, "NUTS3.shp")) fig, ax1 = plt.subplots(figsize=(10, 20)) #Let's create a basemap of Europe x1 = -18. x2 = 38. y1 = 33. y2 = 71. m = Basemap(resolution='i', projection='merc', llcrnrlat=y1, urcrnrlat=y2, llcrnrlon=x1, urcrnrlon=x2, lat_ts=(x1 + x2) / 2) m.drawcountries(linewidth=0.5) m.drawcoastlines(linewidth=0.5) m.drawmapboundary(fill_color='#46bcec') m.fillcontinents(color='white', lake_color='#46bcec') m.drawcoastlines(linewidth=.5) m.readshapefile(os.path.join(data_path, "NUTS3"), 'nuts3') cmap = plt.get_cmap('OrRd') norm = Normalize() # make a color map of fixed colors bounds = [0.05, 1, 5, 10, 50, 100, 500, 1000] norm = colors.BoundaryNorm(bounds, cmap.N) # add values df_poly = pd.DataFrame({ 'shapes': [Polygon(np.array(shape), True) for shape in m.nuts3], 'area': [NUTS_ID['NUTS_ID'] for NUTS_ID in m.nuts3_info], 'value_': [Sum['Sum'] for Sum in m.nuts3_info] }) pc1 = PatchCollection(df_poly.shapes, edgecolor='k', linewidths=0.1, cmap=cmap, zorder=2) pc1.set_facecolor(cmap(norm(df_poly['value_'].fillna(0).values))) ax1.add_collection(pc1) # ADD COLORBAR mapper = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap) mapper.set_array(df_poly['value_']) fig.patch.set_facecolor('white') divider = make_axes_locatable(ax1) cax = divider.new_vertical(size="5%", pad=0.2, pack_start=True) fig.add_axes(cax) cbar = fig.colorbar(mapper, cax=cax, orientation="horizontal") cbar.set_label('Risk in million Dollar (2012)', rotation=0, fontsize=14) fig.savefig(figure_output_path, dpi=600, bbox_inches='tight')
def query_next_gameweek(): config = load_config() data_loader = DataLoader(config) next_gw = int(data_loader.get_next_gameweek_id()) return next_gw
""" Created on Mon Sep 17 19:37:33 2018 @author: cenv0574 """ import os import sys import country_converter as coco cc = coco.CountryConverter() # make connection to all the scripts sys.path.append(os.path.join('..')) from scripts.utils import load_config, create_folder_structure from scripts.analyze import losses if __name__ == '__main__': # make connection to the data paths data_path = load_config()['paths']['data'] storms_path = load_config()['paths']['hazard_data'] # set country country = 'IE' #set folder structure for calculation create_folder_structure(data_path, country) # and estimate losses losses(country, parallel=True, event_set=False, save=True)
def read_outcomes_sens_analysis(): """ Function to write the output of the sensitivity analysis to figures. """ # load some basics data_path = load_config()['paths']['data'] # specify country countries = [ 'LU', 'CZ', 'CH', 'EE', 'LV', 'LT', 'PT', 'ES', 'AT', 'BE', 'DK', 'IE', 'NL', 'NO', 'SE' ] # done: LU country_full_names = { 'CZ': 'Czech Republic', 'CH': 'Switzerland', 'EE': 'Estonia', 'LV': 'Latvia', 'LT': 'Lithuania', 'PT': 'Portugal', 'ES': 'Spain', 'AT': 'Austria', 'BE': 'Belgium', 'DK': 'Denmark', 'LU': 'Luxembourg', 'NL': 'Netherlands', 'IE': 'Ireland', 'UK': 'United Kingdom', 'NO': 'Norway', 'SE': 'Sweden' } storms = { '19991203': 'Anatol', '19900125': 'Daria', '20090124': 'Klaus', '20070118': 'Kyrill', '19991226': 'Lothar' } # set parameters for sensitivity analysis problem = { 'num_vars': 5, 'names': ['c2', 'c3', 'c4', 'lu1', 'lu2'], 'bounds': [[0, 100], [0, 100], [0, 100], [0, 50], [0, 50]] } # select storms to assess storm_name_list = [ '19991203', '19900125', '20090124', '20070118', '19991226' ] storm_list = [] for root, dirs, files in os.walk(os.path.join(data_path, 'STORMS')): for file in files: for storm in storm_name_list: if storm in file: storm_list.append(os.path.join(data_path, 'STORMS', file)) for country in countries: dirlist = os.listdir(os.path.join(data_path, 'output_sens')) country_list = [x for x in dirlist if country in x] k = 0 for i in range(int(len(country_list) / 2)): if i < 1: out = pd.read_csv( os.path.join(data_path, 'output_sens', country_list[k], index_col=0)) else: out2 = (os.path.join(data_path, 'output_sens', country_list[k], index_col=0)).fillna(0) out += out2 k += 2 param_values = pd.read_csv(os.path.join(data_path, 'output_sens', country_list[1]), delim_whitespace=True, header=None) #Estimate outcome of sensitvity analysis param_values = np.asarray(param_values) for l in range(5): try: storm = np.asarray(out.ix[:, l]) Si = delta.analyze(problem, param_values, storm, print_to_console=True) # create histogram plt.hist(storm, bins='auto', ec="k", lw=0.1) plt.autoscale(tight=True) plt.title(country_full_names[country] + ', ' + storms[out.ix[:, l].name]) plt.ylabel('Frequency') plt.xlabel('Total damage in Million Euro') plt.savefig(os.path.join( data_path, 'Figures', country + '_' + storms[out.ix[:, l].name] + '.png'), dpi=300) plt.clf() # create pie chart delta_ = (Si['delta']) / sum(Si['delta']) * 100 colors = [ 'yellowgreen', 'gold', 'lightskyblue', 'lightcoral', 'peru' ] labels = ['c2', 'c3', 'c4', 'lu1', 'lu2'] patches, texts = plt.pie(delta_, colors=colors, startangle=90, radius=0.4, center=(0.5, 0.5)) plt.axis('equal') plt.legend(patches, loc="best", labels=[ '%s : %1.1f%%' % (l, s) for l, s in zip(labels, delta_) ]) plt.title(country_full_names[country] + ', ' + storms[out.ix[:, l].name]) plt.savefig(os.path.join( data_path, 'Figures', country + '_' + storms[out.ix[:, l].name] + '_SA.png'), dpi=300) plt.clf() except Exception: continue
def summary_statistics_losses(): """ This function creates the file 'output_storms.xlsx'. This file is required to create the summary figures. Returns: *output_storms.xlsx* (excel file) -- Excel file with summary outcomes """ data_path = load_config()['paths']['data'] countries = [ 'AT', 'BE', 'DK', 'FR', 'DE', 'IE', 'LU', 'NL', 'NO', 'SE', 'UK', 'PL', 'IT', 'FI' ] first_line = pd.read_csv(os.path.join(data_path, 'output_losses', 'LU', 'LU000_losses.csv'), nrows=1) extract = first_line.columns.tolist()[2:] storm_name_list = extract[8:] output_storms = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) output_storms_res = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) output_storms_ind_com = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) output_storms_transport = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) output_storms_other = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) output_storms_agri = pd.DataFrame(np.zeros( (len(storm_name_list), len(countries))), index=storm_name_list, columns=countries) for country in countries: output_table = pd.DataFrame() for root, dirs, files in os.walk( os.path.join(data_path, 'output_losses', country)): for file in files: output_table = pd.read_csv(os.path.join( data_path, 'output_losses', country, file), usecols=extract) output_table = output_table.replace([np.inf, -np.inf], np.nan).dropna(how='all') output_table = output_table.reset_index(inplace=False) # TOTAL output_storms[country] += ( output_table[storm_name_list].sum(axis=0) / 1000000) # RESIDENTIAL res = output_table[output_table.CLC_2012 < 3] output_storms_res[country] += ( res[storm_name_list].sum(axis=0) / 1000000) # COM/IND ind_com = output_table[output_table.CLC_2012 == 3] output_storms_ind_com[country] += ( ind_com[storm_name_list].sum(axis=0) / 1000000) # TRANSPORT,PORTS,AIRPORTS transport = output_table.loc[np.where( output_table['CLC_2012'].between(4, 6, inclusive=True))[0]] output_storms_transport[country] += ( transport[storm_name_list].sum(axis=0) / 1000000) # OTHER BUILT-UP other = output_table.loc[np.where( output_table['CLC_2012'].between(7, 12, inclusive=True))[0]] output_storms_other[country] += ( other[storm_name_list].sum(axis=0) / 1000000) # AGRICULTURAL BUILDINGS agri = output_table[output_table.CLC_2012 > 12] output_storms_agri[country] += ( agri[storm_name_list].sum(axis=0) / 1000000) output_storms['Sum'] = output_storms.sum(axis=1) output_storms_res['Sum'] = output_storms_res.sum(axis=1) output_storms_ind_com['Sum'] = output_storms_ind_com.sum(axis=1) output_storms_transport['Sum'] = output_storms_transport.sum(axis=1) output_storms_other['Sum'] = output_storms_other.sum(axis=1) output_storms_agri['Sum'] = output_storms_agri.sum(axis=1) out = pd.ExcelWriter(os.path.join(data_path, 'output_storms.xlsx')) output_storms.to_excel(out, sheet_name='total_losses', index_label='Storm') output_storms_res.to_excel(out, sheet_name='res_losses', index_label='Storm') output_storms_ind_com.to_excel(out, sheet_name='ind_com_losses', index_label='Storm') output_storms_transport.to_excel(out, sheet_name='transport_losses', index_label='Storm') output_storms_other.to_excel(out, sheet_name='other_losses', index_label='Storm') output_storms_agri.to_excel(out, sheet_name='agri_losses', index_label='Storm') out.save()
def query_league_data(league_id): config = load_config() data_loader = DataLoader(config) df = data_loader.get_league_standings(league_id) return df
def query_manager_current_gw_picks(manager_id, league_id): config = load_config() data_loader = DataLoader(config) data = data_loader.get_manager_current_gw_picks(manager_id) df = pd.DataFrame(data) data_maker = ModelDataMaker(CONFIG_2020) player_id_team_id_map = data_maker.get_player_id_team_id_map() player_id_player_name_map = data_maker.get_player_id_player_name_map() player_id_player_position_map = data_maker.get_player_id_player_position_map( ) team_id_team_name_map = data_maker.get_team_id_team_name_map() player_id_cost_map = data_maker.get_player_id_cost_map() player_id_selection_map = data_maker.get_player_id_selection_map() # points df_gw = data_loader.get_live_gameweek_data() df_gw = df_gw.rename(columns={"id": "element", "event_points": "Points"}) # print(df_gw.head(1).T) df_gw = df_gw[["element", "Points"]].copy() df_gw = df_gw.drop_duplicates(subset=["element"]) df = pd.merge(df, df_gw, how='left', on="element") # print(df.head()) df["Player"] = df["element"].apply( lambda x: player_id_player_name_map.get(x, x)) df["Player"] = df["Player"].apply(lambda x: " ".join(x.split(" ")[:2])) df["Team"] = df["element"].apply( lambda x: team_id_team_name_map[player_id_team_id_map[x]]) df["Position"] = df["element"].apply( lambda x: player_id_player_position_map.get(x, x)) df["Player"] = df[["Player", "is_captain"]].apply(lambda x: x[0] + " (C)" if x[1] else x[0], axis=1) df["Player"] = df[["Player", "is_vice_captain"]].apply(lambda x: x[0] + " (VC)" if x[1] else x[0], axis=1) df["Cost"] = df["element"].apply(lambda x: player_id_cost_map.get(x, x)) df["Cost"] = df["Cost"] / 10 df["TSB"] = df["element"].apply( lambda x: player_id_selection_map.get(x, x)) # Get Effective ownership df_stats = get_top_eo() df_league_eo = get_league_eo(league_id) df = pd.merge(df, df_stats, on="element", how="left") df = pd.merge(df, df_league_eo, on="element", how="left") df_leads = load_leads_current_gw() df_leads = df_leads[["player_id", "xP"]].copy() df = pd.merge(df, df_leads, how='left', left_on="element", right_on="player_id") position_map = {"GK": 1, "DEF": 2, "MID": 3, "FWD": 4} df["pos"] = df["Position"].apply(lambda x: position_map[x]) df = df.sort_values(by=["pos"]) df_xi = df[df["multiplier"] > 0].copy() df_bench = df[df["multiplier"] == 0].copy() df = pd.concat([df_xi, df_bench]) # print(df.head()) keep_cols = [ "Player", "multiplier", "Team", "Position", "Top EO", "League EO", "xP", "Points" ] # keep_cols = ["Player", "Team", "Position", "TSB", "Top EO", "Points"] # merge player info df = df[keep_cols].copy() return df
def region_exposure(region, include_storms=True, event_set=False, sens_analysis_storms=[], save=True): """Create a GeoDataframe with exposure and hazard information for each building in the specified region. Arguments: *region* (string) -- NUTS3 code of region to consider. *include_storms* (bool) -- if set to False, it will only return a list of buildings and their characteristics (default: **True**) *event_set* (bool) -- if set to True, we will calculate the exposure for the event set instead of the historical storms (default: **True**) *sens_analysis_storms* (list) -- if empty, it will fill with default list *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**) Returns: *GeoDataFrame* with all **hazard** and **exposure** values. """ country = region[:2] data_path = load_config()['paths']['data'] osm_path = os.path.join(data_path, 'OSM', '{}.osm.pbf'.format(country)) area_poly = os.path.join(data_path, country, 'NUTS3_POLY', '{}.poly'.format(region)) area_pbf = os.path.join(data_path, country, 'NUTS3_OSM', '{}.osm.pbf'.format(region)) if (region == 'UKN01') | (region == 'UKN02') | (region == 'UKN03') | ( region == 'UKN04') | (region == 'UKN05'): osm_path = os.path.join(data_path, 'OSM', 'IE.osm.pbf') clip_osm(data_path, osm_path, area_poly, area_pbf) gdf_table = fetch_buildings(data_path, country, region, regional=True) print('Fetched all buildings from osm data for {}'.format(region)) # convert to european coordinate system for overlap gdf_table = gdf_table.to_crs(epsg=3035) print(len(gdf_table)) # Specify Country gdf_table["COUNTRY"] = country # give unique_id gdf_table['ID_'] = [str(x) + '_' + region for x in gdf_table.index] # Calculate area gdf_table["AREA_m2"] = gdf_table.geometry.area # Determine centroid gdf_table["centroid"] = gdf_table.geometry.centroid nuts_eu = gpd.read_file( os.path.join(data_path, 'input_data', 'NUTS3_ETRS.shp')) nuts_eu.loc[nuts_eu['NUTS_ID'] == region].to_file( os.path.join(data_path, country, 'NUTS3_SHAPE', '{}.shp'.format(region))) # create geometry envelope outline for rasterstats. Use a buffer to make sure all buildings are in there. geoms = [ mapping(nuts_eu.loc[nuts_eu['NUTS_ID'] == region].geometry.envelope.buffer(10000).values[0]) ] # Get land use values with rio.open(os.path.join(data_path, 'input_data', 'g100_clc12_V18_5.tif')) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] tqdm.pandas(desc='CLC_2012_' + region) gdf_table['CLC_2012'] = gdf_table.centroid.progress_apply( lambda x: get_raster_value(x, out_image, out_transform)) # Obtain storm values for sensitivity analysis storms if len(sens_analysis_storms) > 0: storm_list = load_sens_analysis_storms(sens_analysis_storms) for outrast_storm in storm_list: storm_name = str( int2date(get_num(outrast_storm[-23:].split('_')[0][:-2]))) tqdm.pandas(desc=storm_name + '_' + region) with rio.open(outrast_storm) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] gdf_table[storm_name] = gdf_table.centroid.progress_apply( lambda x: get_raster_value(x, out_image, out_transform)) # Obtain storm values for historical storms elif (include_storms == True) & (event_set == False): storm_list = get_storm_list(data_path) for outrast_storm in storm_list: storm_name = str( int2date(get_num(outrast_storm[-23:].split('_')[0][:-2]))) tqdm.pandas(desc=storm_name + '_' + region) with rio.open(outrast_storm) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] gdf_table[storm_name] = gdf_table.centroid.progress_apply( lambda x: get_raster_value(x, out_image, out_transform)) gdf_table[storm_name].loc[gdf_table[storm_name] < 0] = 0 gdf_table[storm_name].loc[gdf_table[storm_name] > 500] = 0 # Obtain storm values for event set storms elif (include_storms == True) & (event_set == True): #geoms = [mapping(nuts_eu.loc[nuts_eu['NUTS_ID']==region].to_crs({'init': 'epsg:4326'}).geometry.envelope.buffer(0.1).values[0])] storm_list = get_event_storm_list(data_path)[:10] for outrast_storm in tqdm(storm_list, total=len(storm_list), desc=region): storm_name = str( int2date(get_num(outrast_storm[-24:].split('_')[0][:-4]))) with rio.open(outrast_storm) as src: out_image = src.read(1) out_transform = src.transform gdf_table[storm_name] = gdf_table.centroid.apply( lambda x: get_raster_value(x, out_image, out_transform)) if save == True: df_exposure = pd.DataFrame(gdf_table) df_exposure.to_csv( os.path.join(data_path, 'output_exposure', country, '{}_exposure.csv'.format(region))) print('Obtained all storm information for {}'.format(region)) return gdf_table
def query_league_start_gw(league_id): config = load_config() data_scraper = DataScraper(config) start_gw = data_scraper.get_league_start_gameweek(league_id) return start_gw
def transfer_optimizer(df_leads, manager_id, num_transfers, model_name): df_leads["name"] = df_leads["name"].apply( lambda x: str(x).encode('ascii', 'ignore')) config = load_config() data_loader = DataLoader(config) df_team = pd.DataFrame( data_loader.get_manager_current_gw_picks(manager_id)) df_team = df_team.rename(columns={"element": "player_id"}) bank = data_loader.get_manager_bank_balance(manager_id) df_cost = df_leads[["player_id", "cost", "name", model_name]].copy() df_team = pd.merge(df_team, df_cost, how='inner', on='player_id') prev_score = df_team[model_name].sum() budget = df_team["cost"].sum() + bank # print(df_team.head()) # print(df_leads.head()) # print(budget) # optimization df = df_leads.copy() df = df.pipe(add_position_dummy) df = df.pipe(add_team_dummy) players = df["name"].unique().tolist() current_players = df_team["name"].unique().tolist() fpl_problem = pulp.LpProblem('FPL_Transfers', pulp.LpMaximize) x = pulp.LpVariable.dict('x_ % s', players, lowBound=0, upBound=1, cat=pulp.LpInteger) # player score data player_points = dict(zip(df["name"], np.array(df[model_name]))) # objective function fpl_problem += sum([player_points[i] * x[i] for i in players]) # constraints position_names = ['gk', 'def', 'mid', 'fwd'] formation = '2-5-5-3' position_constraints = [int(i) for i in formation.split('-')] constraints = dict(zip(position_names, position_constraints)) constraints['total_cost'] = budget constraints['team'] = 3 constraints["num_keep"] = 15 - num_transfers # could get straight from dataframe... player_cost = dict(zip(df["name"], df["cost"])) player_position = dict(zip(df["name"], df["position"])) player_team = dict(zip(df["name"], df["team"])) player_gk = dict(zip(df["name"], df["is_gk"])) player_def = dict(zip(df["name"], df["is_def"])) player_mid = dict(zip(df["name"], df["is_mid"])) player_fwd = dict(zip(df["name"], df["is_fwd"])) # apply the constraints fpl_problem += sum([player_cost[i] * x[i] for i in players]) <= float(constraints['total_cost']) fpl_problem += sum([player_gk[i] * x[i] for i in players]) == constraints['gk'] fpl_problem += sum([player_def[i] * x[i] for i in players]) == constraints['def'] fpl_problem += sum([player_mid[i] * x[i] for i in players]) == constraints['mid'] fpl_problem += sum([player_fwd[i] * x[i] for i in players]) == constraints['fwd'] fpl_problem += sum([x[i] for i in current_players]) == constraints['num_keep'] # team constraints for t in df.team: player_team = dict(zip(df["name"], df['team_' + str(t).lower()])) fpl_problem += sum([player_team[i] * x[i] for i in players]) <= constraints['team'] # solve the thing fpl_problem.solve() total_points = 0. total_cost = 0. optimal_squad = [] for p in players: if x[p].value() != 0: total_points += player_points[p] total_cost += player_cost[p] optimal_squad.append({ 'name': p, # 'team': player_team[p], 'position': player_position[p], 'cost': player_cost[p], 'points': player_points[p] }) solution_info = { 'formation': formation, 'total_points': total_points, 'total_cost': total_cost } # pdb.set_trace() df_squad = pd.DataFrame(optimal_squad) now_score = df_squad["points"].sum() new_squad = set(df_squad["name"].unique().tolist()) current_players = set(current_players) transfer_in = list(new_squad.difference(current_players)) transfer_out = list(current_players.difference(new_squad)) transfer_in = [in_player.decode('utf-8') for in_player in transfer_in] transfer_out = [out_player.decode('utf-8') for out_player in transfer_out] df_res = pd.DataFrame() gain = [0 for i in range(len(transfer_in))] gain[-1] = now_score - prev_score df_res["Transfer In"] = transfer_in df_res["Transfer Out"] = transfer_out df_res["gain"] = gain df_res["gain"] = df_res["gain"].round(2) df_res["gain"] = df_res["gain"].astype(str) df_res["gain"] = df_res["gain"].apply(lambda y: "" if int(float(y)) == 0 else y) df_res = df_res.rename(columns={"gain": "Gain"}) return df_res
def losses(country, parallel=True, event_set=False, save=True): """ Creation of exposure table of the specified country Arguments: *country* (string) -- ISO2 code of country to consider. *parallel* (bool) -- calculates all regions within a country parallel. Set to False if you have little capacity on the machine (default: **True**). *event_set* (bool) -- if set to True, we will calculate the losses for the event set instead of the historical storms (default: **True**). *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**). Returns: *GeoDataframe* -- Geopandas dataframe with all buildings of the country and their **losses** for each wind storm. """ #make sure the country inserted is an ISO2 country name for he remainder of the analysis #country = coco.convert(names=country, to='ISO2') # get data path data_path = load_config()['paths']['data'] # create country poly files poly_files(data_path, country) #download OSM file if it is not there yet: download_osm_file(country) #load sample sample = load_sample(country) #get list of regions for which we have poly files (should be all) regions = os.listdir(os.path.join(data_path, country, 'NUTS3_POLY')) regions = [x.split('.')[0] for x in regions] if event_set == False: event_set = len(regions) * [False] samples = len(regions) * [sample] if parallel == True: with Pool(cpu_count() - 2) as pool: country_table = pool.starmap(region_losses, zip(regions, event_set, samples), chunksize=1) else: country_table = [] for region in regions: country_table.append(region_losses(region, False, sample)) elif event_set == True: event_set = len(regions) * [True] samples = len(regions) * [sample] if parallel == True: with Pool(cpu_count() - 2) as pool: country_table = pool.starmap(region_losses, zip(regions, event_set, samples), chunksize=1) else: country_table = [] for region in regions: country_table.append(region_losses(region, True)) if (save == True) & (event_set == False): gdf_table = gpd.GeoDataFrame(pd.concat(country_table), crs='epsg:4326', geometry='geometry') gdf_table.drop(['centroid'], axis='columns', inplace=True) gdf_table.to_file( os.path.join(data_path, 'losses_country', '{}_losses.shp'.format(country))) else: None return gpd.GeoDataFrame(pd.concat(country_table), crs='epsg:4326')
def exposure(country, include_storms=True, parallel=True, save=True): """ Creation of exposure table of the specified country. Arguments: *country* (string) -- ISO2 code of country to consider. *include_storms* (bool) -- if set to False, it will only return a list of buildings and their characteristics (default: **True**). *parallel* (bool) -- calculates all regions within a country parallel. Set to False if you have little capacity on the machine (default: **True**). *save* (bool) -- boolean to decide whether you want to save the output to a csv file (default: **True**). Returns: *GeoDataframe* -- Geopandas dataframe with all buildings of the country and potential exposure to wind """ #make sure the country inserted is an ISO2 country name for he remainder of the analysis #country = coco.convert(names=country, to='ISO2') # get data path data_path = load_config()['paths']['data'] # create country poly files poly_files(data_path, country) #download OSM file if it is not there yet: download_osm_file(country) #get list of regions for which we have poly files (should be all) regions = os.listdir(os.path.join(data_path, country, 'NUTS3_POLY')) regions = [x.split('.')[0] for x in regions] if include_storms == True: storms = len(regions) * [True] country_list = len(regions) * [country] if parallel == True: with Pool(cpu_count() - 2) as pool: country_table = pool.starmap(region_exposure, zip(regions, country_list, storms), chunksize=1) else: country_table = [] for region in regions: country_table.append(region_exposure(region, country, True)) else: storms = len(regions) * [False] country_list = len(regions) * [country] if parallel == True: with Pool(cpu_count() - 2) as pool: country_table = pool.starmap(region_exposure, zip(regions, country_list, storms), chunksize=1) else: country_table = [] for region in regions: country_table.append(region_exposure(region, country, True)) if save == True: gdf_table = gpd.GeoDataFrame(pd.concat(country_table), crs='epsg:4326') gdf_table.drop(['centroid'], axis='columns', inplace=True) gdf_table.to_file( os.path.join(data_path, 'exposure_country', country, '{}_exposure.shp'.format(country))) return gpd.GeoDataFrame(pd.concat(country_table), crs='epsg:4326')
def loss_per_sector(figure_output_path='test_sector.png'): '''This function is used to plot the total losses for the following sectors: Residential,Industrial/Commercial,Transport,Other uses,Agriculture. Arguments: *figure_output_path* (string) -- path to location where you want to save the figure Returns: *A saved figure* ''' data_path = load_config()['paths']['data'] sectors = ['res', 'ind_com', 'transport', 'other', 'agri'] sect_names = [ 'Residential', 'Industrial/Commercial', 'Transport', 'Other uses', 'Agriculture' ] countries = [ 'AT', 'BE', 'DK', 'FR', 'DE', 'IE', 'LU', 'NL', 'NO', 'SE', 'UK', 'PL', 'IT', 'FI' ] country_names = [ 'Austria', 'Belgium', 'Denmark', 'France', 'Germany', 'Ireland', 'Luxembourg', 'Netherlands', 'Norway', 'Sweden', 'United Kingdom', 'Poland', 'Italy', 'Finland' ] cols_to_load = ['Storm'] + countries all_storm = pd.read_excel(os.path.join(data_path, 'output_storms.xlsx'), sheet_name='total_losses') #,index_col=0) all_storm = all_storm[cols_to_load] all_storm['Storm'] = pd.to_datetime(all_storm['Storm']) all_storm.set_index('Storm', inplace=True) all_storm.rename(columns=dict(zip(countries, country_names)), inplace=True) loss_per_year = all_storm.resample("A").sum() loss_per_year['Year'] = loss_per_year.index.year loss_per_year.set_index('Year', inplace=True) loss_per_sector = pd.DataFrame(columns=sectors, index=loss_per_year.index) for sect in sectors: sect_loss = pd.read_excel(os.path.join(data_path, 'output_storms.xlsx'), sheetname=sect + '_losses') sect_loss = sect_loss[cols_to_load] sect_loss['Storm'] = pd.to_datetime(sect_loss['Storm']) sect_loss.set_index('Storm', inplace=True) inb_sec = sect_loss.resample("A").sum() inb_sec = inb_sec.sum(axis=1) loss_per_sector[sect] = np.array(inb_sec) # RENAME loss_per_sector.rename(columns=dict(zip(sectors, sect_names)), inplace=True) fig, ax_ys = plt.subplots(figsize=(10, 8)) loss_per_sector.plot.bar(ax=ax_ys, stacked=True, width=0.9, ec="w", lw=0.1, colormap="Paired") plt.setp(ax_ys.get_xticklabels(), rotation=80) ax_ys.set_xlabel("Years", fontweight='bold') ax_ys.set_ylabel("Loss in million dollar", fontweight='bold') ax_ys.set_yticks(np.arange(0, 26000, 2500), minor=False) ax_ys.set_ylim(0, 25000) ax_ys.legend(loc='upper right', frameon=True, prop={'size': 12}) ax_ys.patch.set_facecolor('0.98') plt.savefig(figure_output_path, dpi=600, bbox_inches='tight')
def region_losses(region, storm_event_set=False, sample=(5, 0, 95, 20, 80)): """Estimation of the losses for all buildings in a region for a pre-defined list of storms. Arguments: *region* (string) -- nuts code of region to consider. *storm_event_set* (bool) -- calculates all regions within a country parallel. Set to **False** if you have little capacity on the machine (default: **True**). *sample* (tuple) -- tuple of parameter values. This is a dummy placeholder, should be filled with either **load_sample(country)** values or **sens_analysis_param_list**. Returns: *pandas Dataframe* -- pandas dataframe with all buildings of the region and their **losses** for each wind storm. """ data_path = load_config()['paths']['data'] country = region[:2] #load storms if storm_event_set == False: storm_list = get_storm_list(data_path) storm_name_list = [ str(int2date(get_num(x[-23:].split('_')[0][:-2]))) for x in storm_list ] else: storm_list = get_event_storm_list(data_path) storm_name_list = [ str(int2date(get_num(x[-24:].split('_')[0][:-4]))) for x in storm_list ] #load max dam max_dam = load_max_dam(data_path) #load curves curves = load_curves(data_path) output_table = region_exposure(region, include_storms=True, event_set=storm_event_set) no_storm_columns = list( set(output_table.columns).difference(list(storm_name_list))) write_output = pd.DataFrame(output_table[no_storm_columns]) ## Calculate losses for buildings in this NUTS region for storm in storm_name_list: write_output[storm] = loss_calculation(storm, country, output_table, max_dam, curves, sample) df_losses = pd.DataFrame(write_output) ## save this regional file if storm_event_set == False: df_losses.to_csv( os.path.join(data_path, 'output_losses', country, '{}_losses.csv'.format(region))) print('Finished with loss calculation for {}'.format(region)) return (gpd.GeoDataFrame(write_output)) else: #Numpify data pdZ = np.array(df_losses[storm_name_list], dtype=int) write_output.drop(storm_name_list, axis=1, inplace=True) output_ = [] for row in pdZ: H, X1 = np.histogram(row, bins=100, normed=True) dx = X1[1] - X1[0] F1 = np.cumsum(np.append(0, H)) * dx output_.append(metrics.auc(X1, F1)) df_losses['Risk'] = output_ df_losses.to_csv( os.path.join(data_path, 'output_risk', country, '{}_risk.csv'.format(region))) print('Finished with risk calculation for {}'.format(region)) return (gpd.GeoDataFrame(write_output))