def create_gdp_viz(): ''' Load and pre-process the geojson file ''' geojson_path = os.path.normpath( os.path.join(script_dir_path, '..', 'data', 'borders_geo.json')) world_geojson = gpd.read_file(geojson_path) world_geojson.drop(columns=['ISO_A2', 'ADMIN'], inplace=True) world_geojson.drop(world_geojson[world_geojson['ISO_A3'] == '-99'].index, inplace=True) country_list = world_geojson['ISO_A3'].tolist() ''' Load and pre-process the GDP data ''' # Load the GDP data df_GDP_path = os.path.normpath( os.path.join(script_dir_path, '..', 'data', 'GDP_per_capita_world_data.csv')) df_GDP = pd.read_csv(df_GDP_path, index_col='Country Code', skiprows=4) # Drop unnecessary data df_GDP.drop(labels='2020', axis=1, inplace=True) csv_country_list = df_GDP.index.tolist() country_list = list(set(country_list).intersection(csv_country_list)) df_GDP.drop(df_GDP[~df_GDP.index.isin(country_list)].index, inplace=True) world_geojson.drop( world_geojson[~world_geojson['ISO_A3'].isin(country_list)].index, inplace=True) country_list.sort() # Create an enumerated country dict for id mapping country_dict = {k: v for v, k in enumerate(country_list)} world_geojson['country_id'] = world_geojson['ISO_A3'].map(country_dict) # Count min and max GDP values min_GDP_val, max_GDP_val = df_GDP[df_GDP.columns[4:]].min().min(), df_GDP[ df_GDP.columns[4:]].max().max() # Create a color list color_list = [ '#808080', '#A50026', '#D73027', '#F46D43', '#FDAE61', '#FEE08B', '#FFFFBF', '#D9EF8B', '#A6D96A', '#66BD63', '#1A9850', '#006837' ] # Create a list of geometrically spaced numbers over a min-max interval bins = np.geomspace(min_GDP_val, max_GDP_val, 12) # Replace NaNs (records with no data available) with '-1' df_GDP.fillna(-1, inplace=True) # Add NaN category to the bins bins = np.insert(bins, 0, -1.) bins = bins.tolist() # Append 'color_[year]' columns to the GDP DataFrame year = 1960 while year <= 2019: pasted_col_id = df_GDP.columns.get_loc(str(year)) + 1 col_value = pd.cut(df_GDP[str(year)], bins, include_lowest=True, labels=[ '#808080', '#A50026', '#D73027', '#F46D43', '#FDAE61', '#FEE08B', '#FFFFBF', '#D9EF8B', '#A6D96A', '#66BD63', '#1A9850', '#006837' ]) df_GDP.insert(loc=pasted_col_id, column='color_' + str(year), value=col_value) year += 1 print(df_GDP) ''' Create appropriately formatted dictionary that the TimeSliderChoropleth will receive as an input ''' gdp_dict = {} for country_code in df_GDP.index.tolist(): country_id = str(country_dict[country_code]) gdp_dict[country_id] = {} year = 1960 while year <= 2019: dt_obj = datetime(year=year, month=12, day=31) year_in_ms = str(time.mktime(dt_obj.timetuple())) color_hex = df_GDP.at[country_code, 'color_' + str(year)] gdp_dict[country_id][year_in_ms] = { 'color': color_hex, 'opacity': 0.7 } year += 1 ''' Initialize the map ''' map_GDP = folium.Map(location=[0, 0], zoom_start=4, max_bounds=True, min_zoom=3) ''' Create the map content and add it to the map object ''' # Create the choropleth choropleth = TimeSliderChoropleth( world_geojson.set_index('country_id').to_json(), styledict=gdp_dict) choropleth.add_to(map_GDP) # Create the map legend legend_labels_dict = {} i = 0 for color in color_list: if i == 0: legend_labels_dict[color_list[i]] = 'No data' elif i == len(color_list) - 1: legend_labels_dict[color_list[i]] = '> ' + str(round(bins[i], 2)) + '$' break else: legend_labels_dict[color] = str(round( bins[i], 2)) + '$' + ' - ' + str(round(bins[i + 1], 2)) + '$' i += 1 template = utils.create_legend(caption='GDP per capita in USD', legend_labels=legend_labels_dict) macro = MacroElement() macro._template = Template(template) map_GDP.get_root().add_child(macro) ''' Save completed map viz to an appropriate folder ''' map_GDP.save( os.path.join(script_dir_path, '..', 'webapp', 'templates', 'GDP_viz.html')) print('Successfully created the GDP viz!')
def create_sf_crime_viz(): ''' Load and pre-process the San Francisco crime data ''' # Load the crime data df_crime = pd.read_csv( os.path.join(data_dir_path, 'last_week_SF_crimes.csv')) # Drop the rows in which there's no lat lon data df_crime = df_crime[df_crime['latitude'].notna()] df_crime = df_crime[df_crime['longitude'].notna()] # Create popups and their contents popups_list, locations_list = [], [] for _, row in df_crime.iterrows(): # Trim unnecessary information from the timestamp incident_timestamp = row['incident_datetime'] incident_timestamp = incident_timestamp.replace('T', ' ') incident_timestamp = incident_timestamp[:-7] # Create a popup object and append it to the popups array popup_content = '<strong>Timestamp: </strong>' + incident_timestamp + '<br>' \ + '<strong>Day of the week: </strong>' + row['incident_day_of_week'] + '<br>' \ + '<strong>Description: </strong>' + row['incident_description'] popups_list.append(folium.Popup(html=popup_content)) # Get the lat, lon location data and add it to the list locations_list.append(row[['latitude', 'longitude']].to_numpy().tolist()) ''' Initialize the map ''' map_crime = folium.Map(location=[37.773972, -122.431297], zoom_start=11, max_bounds=True, min_zoom=9, max_lat=38.5, max_lon=-122, min_lat=37, min_lon=-123) ''' Create the map content and add it to the map object ''' # Create marker cluster icon_list = [] for _ in range(len(locations_list)): icon_list.append( folium.Icon(icon='exclamation', prefix='fa', color='orange')) marker_cluster = MarkerCluster(locations=locations_list, popups=popups_list, icons=icon_list) marker_cluster.add_to(map_crime) # Create map legend current_timestamp = datetime.now() - timedelta(days=1) week_before = current_timestamp - timedelta(weeks=1) current_timestamp = current_timestamp.strftime('%Y-%m-%d') week_before = week_before.strftime('%Y-%m-%d') template = utils.create_legend(caption='San Francisco crimes between ' + week_before + ' and ' + current_timestamp) macro = MacroElement() macro._template = Template(template) map_crime.get_root().add_child(macro) ''' Save completed map viz to an appropriate folder ''' map_crime.save( os.path.join(script_dir_path, '..', 'webapp', 'templates', 'SF_crime_viz.html')) print('Successfully created the San Francisco crime viz!')
def main(): ### Main definitions ROOT.gROOT.SetBatch() ### canvas, c_top, c_bottom = utils.create_double_pad('plot_3lcr_2l2v') ### root_file = ROOT.TFile('inputs/MTW_all_met_tst.root') stack = utils.re_style_top( root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('hs1')) stack.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]') stack.GetYaxis().SetTitle('Events / 30 GeV') hists = {} merging_scheme = utils.complex_merging full_stack = stack.GetStack().Last() for hist in stack.GetHists(): prev_color = hist.GetFillColor() sample_name = None for key, vals in utils.original_colors_2l2v.iteritems(): if prev_color in vals: sample_name = key if not sample_name: 'Sample Name Not Found' else: ''' for merging_key, merging_list in merging_scheme.iteritems(): if sample_name in merging_list: print sample_name, ' is supposed to be merged into ', merging_key ''' hists[sample_name] = utils.re_shape_tail(hist) ''' a = hist.GetXaxis() print print a.GetNbins() print a.GetXmin() print a.GetXmax() ''' ### Getting complicated here... loop_over_this = hists.keys() to_be_removed = utils.wjets_removal for key in loop_over_this: for merging_key, merging_list in merging_scheme.iteritems(): if key in merging_list: to_be_removed.add(key) if merging_key in hists: hists[merging_key].Add(hists[key]) else: hists[merging_key] = hists[key].Clone('another_clone_' + hists[key].GetName()) to_be_used = [] for k in hists: hist = hists[k] hist.SetFillColor(utils.get_colors(k)) hist.SetLineColor(ROOT.kBlack) hist.SetLineWidth(1) hist.SetTitle(utils.titles[k]) if not k in to_be_removed: to_be_used.append((k, hist.Integral())) sample_list = sorted(to_be_used, key=lambda x: x[1]) sample_list_r = sorted(to_be_used, key=lambda x: x[1], reverse=True) new_stack = ROOT.THStack(stack.GetName() + '_clone', '') for name, integral in sample_list: new_stack.Add(hists[name]) ### data = utils.re_style_top( utils.th1_to_tgraph( root_file.Get('c1').GetPrimitive('pad1').GetPrimitive( 'met_tst_MTW_all_Nominal'), True)) prev_error, last_bin_ratio = utils.re_shape_tail( root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('h0'), True) error = utils.re_style_top(utils.th1_to_tgraph(prev_error)) error.SetMarkerStyle(1) error.SetFillColor(ROOT.kBlack) error.SetFillStyle(3345) ### ratio_axis = utils.re_style_bot( root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h3')) ratio = utils.re_style_bot( utils.th1_to_tgraph( root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h3'), True)) ratio_axis.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]') ratio_axis.GetYaxis().SetTitle('#frac{Data}{Prediction}') syst_band = utils.re_style_bot( utils.th1_to_tgraph( utils.change_last_bin( root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h0'), last_bin_ratio))) #ratio = utils.re_style_bot( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ) ) #ratio.GetXaxis().SetTitle( '#it{E}_{T}^{miss} [GeV]' ) #ratio.GetYaxis().SetTitle( '#frac{Data}{Prediction}' ) ##syst_band = utils.re_style_bot( utils.re_shape_tail( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ), full_stack ) ) #syst_band = utils.re_style_bot( utils.change_last_bin( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ), last_bin_ratio ) ) syst_band.SetMarkerStyle(1) syst_band.SetFillColor(ROOT.kBlack) syst_band.SetFillStyle(3345) ### c_top.cd() new_stack.SetMaximum(1000000) new_stack.SetMinimum(0.001) new_stack.Draw('hist') new_stack = utils.re_style_top(new_stack) new_stack.Draw('hist') new_stack.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]') new_stack.GetYaxis().SetTitle('Events / 30 GeV') new_stack.GetXaxis().SetRangeUser(0, 600) error.Draw('e2 same') data.Draw('pe') x_l1, y_l1, latex1 = utils.draw_latex(utils.lumi, True) latex1.DrawLatex( x_l1, y_l1 - 0.12, utils.h_to_zz_to + utils.lepp + utils.lepm + utils.nu + utils.nubar + utils.void_char + utils.inv_prime) latex1.DrawLatex(x_l1, y_l1 - 0.18, '3#it{l} Control Region') leg = utils.create_legend(len(sample_list_r), 0.3, 1) leg.AddEntry(data, 'Data', 'pe') for name, integral in sample_list_r: leg.AddEntry(hists[name], hists[name].GetTitle(), 'f') leg.AddEntry(error, 'Uncertainty', 'f') leg.Draw() #utils.patch_bar( 229. / 566., 234. / 566., 322. / 407., 322. / 407., True ) c_top.SetLogy() c_bottom.cd() ratio_axis.Draw('axis') ratio_axis.GetXaxis().SetRangeUser(0, 600) syst_band.Draw('e2 same') ratio.Draw('pe0 same') c_bottom.Update() for arrow in utils.get_arrows(ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio): arrow.Draw() Line = ROOT.TLine(ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1) Line.SetLineColor(ROOT.kBlack) Line.SetLineWidth(2) Line.SetLineStyle(2) Line.Draw() #canvas.SaveAs( canvas.GetName() + '.pdf' ) utils.save(canvas)
def make_plot( file_name, cat, title, maxYtop, minY = 0.02, maxY = 1.98 ): #def make_plot( file_name, cat, title, maxYtop, minY = 0.18, maxY = 1.82 ): #def make_plot( file_name, cat, title, maxYtop, minY = 0., maxY = 3.4 ): ### canvas, c_top, c_bottom = utils.create_double_pad( 'plot_%s' % file_name.replace( '.root', '' ) ) ### root_file = ROOT.TFile( 'inputs/' + file_name ) stack = utils.re_style_top( root_file.Get( 'ratio' ).GetPrimitive( 'p1_ratio' ).GetPrimitive( 'combined' ) ) hists = {} merging_scheme = {} #utils.complex_merging for hist in stack.GetHists(): prev_color = hist.GetFillColor() sample_name = None for key, vals in utils.original_colors_4l.iteritems(): if prev_color in vals: sample_name = key ''' if not sample_name: 'Sample Name Not Found' else: hist.SetFillColor( utils.get_colors( sample_name ) ) #hist.SetLineColor( utils.colors[ sample_name ] ) hist.SetLineColor( ROOT.kBlack ) hist.SetLineWidth( 1 ) hist.SetTitle( utils.titles[ sample_name ] ) ''' hists[ sample_name ] = hist ### Getting complicated here... loop_over_this = hists.keys() to_be_removed = set() for key in loop_over_this: for merging_key, merging_list in merging_scheme.iteritems(): if key in merging_list: to_be_removed.add( key ) if merging_key in hists: hists[ merging_key ].Add( hists[ key ] ) else: hists[ merging_key ] = hists[ key ].Clone( 'another_clone_' + hists[ key ].GetName() ) to_be_used = [] for k in hists: hist = hists[ k ] hist.SetFillColor( utils.get_colors( k ) ) hist.SetLineColor( ROOT.kBlack ) hist.SetLineWidth( 1 ) hist.SetTitle( utils.titles[ k ] ) if not k in to_be_removed: penalty = 0 if k == 'Red': penalty = -1 to_be_used.append( ( k, hist.Integral() + penalty ) ) sample_list = sorted( to_be_used, key = lambda x: x[ 1 ] ) sample_list_r = sorted( to_be_used, key = lambda x: x[ 1 ], reverse = True ) new_stack = ROOT.THStack( stack.GetName() + '_clone', '' ) for name, integral in sample_list: new_stack.Add( hists[ name ] ) data, error = None, None for element in root_file.Get( 'ratio' ).GetPrimitive( 'p1_ratio' ).GetListOfPrimitives(): if element.GetName() == 'Graph': if not utils.is_data( element ): error = utils.re_style_top( element ) error.SetMarkerStyle( 1 ) error.SetFillColor( ROOT.kBlack ) error.SetFillStyle( 3345 ) else: data = utils.re_style_top( element ) ### ratio = utils.re_style_bot( root_file.Get( 'ratio' ).GetPrimitive( 'p2_ratio' ).GetPrimitive( 'dataRatio' ) ) syst_band = utils.re_style_bot( root_file.Get( 'ratio' ).GetPrimitive( 'p2_ratio' ).GetPrimitive( 'sysGraphRatio' ), minY, maxY ) syst_band.SetMarkerStyle( 1 ) syst_band.SetFillColor( ROOT.kBlack ) syst_band.SetFillStyle( 3345 ) syst_band.GetXaxis().SetTitle( utils.m4l ) #'#it{m}_{4l} [GeV]' ) syst_band.GetYaxis().SetTitle( '#frac{Data}{Prediction}' ) ### c_top.cd() new_stack.SetMinimum( 0.003 ) new_stack.Draw( 'hist' ) new_stack = utils.re_style_top( new_stack ) new_stack.Draw( 'hist' ) #new_stack.GetXaxis().SetRangeUser( 250, 1500 ) new_stack.GetXaxis().SetTitle( utils.m4l ) #'#it{m}_{4l} [GeV]' ) new_stack.GetYaxis().SetTitle( 'Events / %d GeV' % int( stack.GetXaxis().GetBinWidth( 1 ) ) ) new_stack.SetMaximum( maxYtop ) error.Draw( 'e2' ) data.Draw( 'p same' ) x_l1, y_l1, latex1 = utils.draw_latex( utils.lumi, True ) latex1.DrawLatex( x_l1, y_l1 - 0.12, title + utils.void_char ) latex1.DrawLatex( x_l1, y_l1 - 0.18, cat + utils.void_char ) leg = utils.create_legend( len( sample_list_r ), 0.3, 1 ) leg.AddEntry( data, 'Data', 'pe' ) for name, integral in sample_list_r: leg.AddEntry( hists[ name ], hists[ name ].GetTitle(), 'f' ) leg.AddEntry( error, 'Uncertainty', 'f' ) #leg.AddEntry( None, '', '' ) #leg.AddEntry( None, '#color[0]{' + utils.titles[ 'emu' ] + '}', '' ) leg.Draw() c_top.Update() minX, maxX = ROOT.gPad.GetUxmin(), ROOT.gPad.GetUxmax() c_top.SetLogy() c_bottom.cd() syst_band.Draw( 'ae2' ) syst_band.GetXaxis().SetRangeUser( minX, maxX ) ratio.Draw( 'pe0 same' ) c_bottom.Update() for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ): arrow.Draw() Line = ROOT.TLine( ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1 ) Line.SetLineColor( ROOT.kBlack ) Line.SetLineWidth( 2 ) Line.SetLineStyle( 2 ) Line.Draw() utils.save( canvas )
def make_plot( file_name, cat, title, maxYtop, minY = 0.18, maxY = 1.82 ): ### canvas, c_top, c_bottom = utils.create_double_pad( 'plot_sr_' + cat + 'vv' ) ### root_file = ROOT.TFile( 'inputs/' + file_name ) stack = utils.re_style_top( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'hs1' ) ) hists = {} merging_scheme = utils.complex_merging for hist in stack.GetHists(): prev_color = hist.GetFillColor() sample_name = None for key, vals in utils.original_colors_2l2v.iteritems(): if prev_color in vals: sample_name = key ''' if not sample_name: 'Sample Name Not Found' else: hist.SetFillColor( utils.get_colors( sample_name ) ) #hist.SetLineColor( utils.colors[ sample_name ] ) hist.SetLineColor( ROOT.kBlack ) hist.SetLineWidth( 1 ) hist.SetTitle( utils.titles[ sample_name ] ) ''' hists[ sample_name ] = hist ### Getting complicated here... loop_over_this = hists.keys() to_be_removed = set()#utils.wjets_removal for key in loop_over_this: for merging_key, merging_list in merging_scheme.iteritems(): if key in merging_list: to_be_removed.add( key ) if merging_key in hists: hists[ merging_key ].Add( hists[ key ] ) else: hists[ merging_key ] = hists[ key ].Clone( 'another_clone_' + hists[ key ].GetName() ) to_be_used = [] for k in hists: hist = hists[ k ] hist.SetFillColor( utils.get_colors( k ) ) hist.SetLineColor( ROOT.kBlack ) hist.SetLineWidth( 1 ) hist.SetTitle( utils.titles[ k ] ) if not k in to_be_removed: penalty = 0 if k == 'Wjets': penalty = -1 to_be_used.append( ( k, hist.Integral() + penalty ) ) sample_list = sorted( to_be_used, key = lambda x: x[ 1 ] ) sample_list_r = sorted( to_be_used, key = lambda x: x[ 1 ], reverse = True ) new_stack = ROOT.THStack( stack.GetName() + '_clone', '' ) for name, integral in sample_list: new_stack.Add( hists[ name ] ) data = utils.re_style_top( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'mT_' + cat + '_Nominal' ), True ) ) utils.print_contents( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'mT_' + cat + '_Nominal' ) ) error = utils.re_style_top( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'h0' ) ) ) error.SetMarkerStyle( 1 ) error.SetFillColor( ROOT.kBlack ) error.SetFillStyle( 3345 ) ### ratio_axis = utils.re_style_bot( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ), minY, maxY ) ratio = utils.re_style_bot( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ), True ), minY, maxY ) ratio_axis.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' ) ratio_axis.GetYaxis().SetTitle( '#frac{Data}{Prediction}' ) syst_band = utils.re_style_bot( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ) ) ) for index in range( syst_band.GetN() ): X, Y = ROOT.Double( 0. ), ROOT.Double( 0. ) syst_band.GetPoint( index, X, Y ) syst_band.SetMarkerStyle( 1 ) syst_band.SetFillColor( ROOT.kBlack ) syst_band.SetFillStyle( 3345 ) syst_band.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' ) syst_band.GetYaxis().SetTitle( '#frac{Data}{Prediction}' ) ### c_top.cd() new_stack.SetMinimum( 0.002 ) new_stack.Draw( 'hist' ) new_stack = utils.re_style_top( new_stack ) new_stack.Draw( 'hist' ) new_stack.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' ) new_stack.GetYaxis().SetTitle( 'Events / %d GeV' % int( stack.GetXaxis().GetBinWidth( 1 ) ) ) new_stack.SetMaximum( maxYtop ) new_stack.GetXaxis().SetRangeUser( 250, 1500 ) error.Draw( '2 same' ) data.Draw( 'pe' ) x_l1, y_l1, latex1 = utils.draw_latex( utils.lumi, True ) latex1.DrawLatex( x_l1, y_l1 - 0.12, title + utils.void_char ) latex1.DrawLatex( x_l1, y_l1 - 0.18, '' ) leg = utils.create_legend( len( sample_list_r ), 0.3, 1 ) leg.AddEntry( data, 'Data', 'pe' ) for name, integral in sample_list_r: leg.AddEntry( hists[ name ], hists[ name ].GetTitle(), 'f' ) leg.AddEntry( error, 'Uncertainty', 'f' ) leg.Draw() mod_x, mod_y = 0, 0 if cat == 'ee': mod_x, mod_y = 4, -2 #utils.patch_bar( ( 228. + mod_x ) / 566., ( 233. + mod_x ) / 566., ( 324. + mod_y ) / 407., ( 324. + mod_y ) / 407., True ) c_top.SetLogy() c_bottom.cd() ratio_axis.Draw( 'axis' ) ratio_axis.GetXaxis().SetRangeUser( 250, 1500 ) syst_band.Draw( 'e2 same' ) ratio.Draw( 'pe0 same' ) c_bottom.Update() for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ): arrow.Draw() Line = ROOT.TLine( ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1 ) Line.SetLineColor( ROOT.kBlack ) Line.SetLineWidth( 2 ) Line.SetLineStyle( 2 ) Line.Draw() utils.save( canvas )
def make_plot(file_name, cat, title, minX=-999., maxX=-1., minY=0.02, maxY=1.98, isLog=1): print "isLog:", isLog ### canvas, c_top, c_bottom = utils.create_double_pad( 'plot_%s' % file_name.replace('.root', '')) ### root_file = ROOT.TFile('input_offshell/' + file_name) ch_file = file_name.split('_')[1] val_file = file_name.split('_')[2] + '_' + file_name.split('_')[3] stack = utils.re_style_top( root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('hs1')) hists = {} merging_scheme = {} #utils.complex_merging for hist in stack.GetHists(): prev_color = hist.GetFillColor() #print hist, prev_color sample_name = None for key, vals in utils.original_colors_2l2v.iteritems(): #print key, vals if prev_color in vals: sample_name = key ''' if not sample_name: 'Sample Name Not Found' else: hist.SetFillColor( utils.get_colors( sample_name ) ) #hist.SetLineColor( utils.colors[ sample_name ] ) hist.SetLineColor( ROOT.kBlack ) hist.SetLineWidth( 1 ) hist.SetTitle( utils.titles[ sample_name ] ) ''' hists[sample_name] = hist ### get those no stack histograms in pad1 data, error, sbi, sbi5 = None, None, None, None for element in root_file.Get('c1').GetPrimitive( 'pad1').GetListOfPrimitives(): #print element if element.GetName( ) == 'stat_sys_band_' + val_file + '_BJETVETO_' + ch_file: error = utils.re_style_top(element) error.SetMarkerStyle(1) error.SetFillColor(ROOT.kBlack) error.SetFillStyle(3345) elif element.GetName( ) == 'data_' + val_file + '_BJETVETO_' + ch_file + '_Nominal': data = utils.re_style_top(element) elif element.GetName( ) == 'SBI5_' + val_file + '_NLOIBJETVETO_' + ch_file + '_Nominal': sbi5 = utils.re_style_top(element) sbi5.SetMarkerColor(0) sbi5.SetMarkerStyle(0) sbi5.SetMarkerSize(0) sbi5.SetLineColor(616) sbi5.SetLineStyle(2) elif element.GetName( ) == 'SBI_' + val_file + '_NLOIBJETVETO_' + ch_file + '_Nominal': sbi = element prev_color = sbi.GetFillColor() #print sbi, prev_color sample_name = None for key, vals in utils.original_colors_2l2v.iteritems(): if prev_color in vals: sample_name = key for k in hists: hist = hists[k] sbi.Add(hist, -1) hists[sample_name] = sbi ### Getting complicated here... loop_over_this = hists.keys() to_be_removed = set() for key in loop_over_this: for merging_key, merging_list in merging_scheme.iteritems(): if key in merging_list: to_be_removed.add(key) if merging_key in hists: hists[merging_key].Add(hists[key]) else: hists[merging_key] = hists[key].Clone('another_clone_' + hists[key].GetName()) to_be_used = [('ZZ', 3), ('Others', 1), ('WZ', 2), ('SBI', 4)] for k in hists: hist = hists[k] hist.SetFillColor(utils.get_colors(k)) if k == 'SBI': hist.SetLineColor(ROOT.kBlue) else: hist.SetLineColor(ROOT.kBlack) hist.SetLineWidth(1) hist.SetTitle(utils.titles[k]) # if not k in to_be_removed: # penalty = 0 # to_be_used.append( ( k, hist.Integral() + penalty ) ) print "to_be_used:", to_be_used sample_list = sorted(to_be_used, key=lambda x: x[1]) sample_list_r = sorted(to_be_used, key=lambda x: x[1], reverse=True) new_stack = ROOT.THStack(stack.GetName() + '_clone', '') for name, integral in sample_list: #print name, integral new_stack.Add(hists[name]) ### ratio = utils.re_style_bot( root_file.Get('c1').GetPrimitive( 'pad2').GetPrimitive('ratio_bkg_data_' + val_file + '_BJETVETO_' + ch_file + '_Nominal')) syst_band = utils.re_style_bot( root_file.Get('c1').GetPrimitive('pad2').GetPrimitive( 'ratio_sys_band_' + val_file + '_BJETVETO_' + ch_file), minY, maxY) syst_band.SetMarkerStyle(1) syst_band.SetFillColor(ROOT.kBlack) syst_band.SetFillStyle(3345) if val_file == 'met_tst': syst_band.GetXaxis().SetTitle(utils.met) elif val_file == 'mT_ZZ': syst_band.GetXaxis().SetTitle(utils.mTZZ) syst_band.GetYaxis().SetTitle('#frac{Data}{Prediction}') ### if minX == -999. or maxX == -1.: minX, maxX = ROOT.gPad.GetUxmin(), ROOT.gPad.GetUxmax() print "minX =", minX, "maxX =", maxX c_top.cd() new_stack.SetMinimum(0.003) new_stack.Draw('hist') new_stack = utils.re_style_top(new_stack) new_stack.Draw('hist') new_stack.GetXaxis().SetRangeUser(minX, maxX) #new_stack.GetXaxis().SetTitle( utils.m4l ) new_stack.GetYaxis().SetTitle('Events / %d GeV' % stack.GetXaxis().GetBinWidth(1)) #new_stack.GetYaxis().SetTitle( 'Events / %s' % stack.GetXaxis().GetBinWidth( 1 ) ) if isLog == 1: Yscale = 1000 else: Yscale = 1.6 new_stack.SetMaximum(data.GetMaximum() * Yscale) error.Draw('e2 same') sbi5.Draw('hist same') data.Draw('p same') x_l1, y_l1, latex1 = utils.draw_latex(utils.lumi, True) latex1.DrawLatex(x_l1, y_l1 - 0.12, title + utils.void_char) latex1.DrawLatex(x_l1, y_l1 - 0.18, cat + utils.void_char) leg = utils.create_legend(len(sample_list_r), 0.3, 1) leg.AddEntry(data, 'Data', 'pe') leg.AddEntry(sbi5, 'gg+VBF#rightarrow(H*#rightarrow)ZZ(#mu_{off-shell}=5)', 'l') for name, integral in sample_list_r: leg.AddEntry(hists[name], hists[name].GetTitle(), 'f') leg.AddEntry(error, 'Uncertainty', 'f') #leg.AddEntry( None, '', '' ) #leg.AddEntry( None, '#color[0]{' + utils.titles[ 'emu' ] + '}', '' ) leg.Draw() c_top.Update() if isLog == 1: c_top.SetLogy() c_bottom.cd() syst_band.Draw('e2') syst_band.GetXaxis().SetRangeUser(minX, maxX) ratio.Draw('pe0 same') c_bottom.Update() #for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ): arrow.Draw() Line = ROOT.TLine(ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1) Line.SetLineColor(ROOT.kBlack) Line.SetLineWidth(2) Line.SetLineStyle(2) Line.Draw() print "isLog:", isLog utils.save(canvas, isLog)
def create_covid_viz(): ''' Load and pre-process the geojson file ''' world_geojson = gpd.read_file(geojson_path) world_geojson.drop(columns=['ISO_A2'], inplace=True) ''' Load and pre-process the COVID-19 data ''' # Load the COVID-19 data df_covid = pd.read_csv( os.path.join(data_dir_path, 'covid_' + newest_dataset)) timestamp = df_covid['Last_Update'][0] # Replace some country names df_covid.replace(to_replace={'Country_Region': 'US'}, value='United States of America', inplace=True) df_covid.replace(to_replace={'Country_Region': 'Bahamas'}, value='The Bahamas', inplace=True) df_covid.replace(to_replace={'Country_Region': 'Congo (Brazzaville)'}, value='Republic of Congo', inplace=True) df_covid.replace(to_replace={'Country_Region': 'Congo (Kinshasa)'}, value='Democratic Republic of the Congo', inplace=True) df_covid.replace(to_replace={'Country_Region': 'Taiwan*'}, value='Taiwan', inplace=True) df_covid.replace(to_replace={'Country_Region': "Cote d'Ivoire"}, value='Ivory Coast', inplace=True) df_covid.replace(to_replace={'Country_Region': "Czechia"}, value='Czech Republic', inplace=True) world_geojson.replace(to_replace={'ADMIN': 'Macedonia'}, value='North Macedonia', inplace=True) # Change the name of 'ADMIN' column in the geojson DF to match the one in COVID DF world_geojson.rename(columns={'ADMIN': 'Country_Region'}, inplace=True) # Aggregate the data for countries that have regional information df_covid_agg = df_covid.groupby('Country_Region').agg({ 'Confirmed': 'sum', 'Deaths': 'sum', 'Recovered': 'sum', 'Active': 'sum', 'Incident_Rate': 'mean', 'Case_Fatality_Ratio': 'mean' }) world_geojson = world_geojson.sort_values('Country_Region').reset_index( drop=True) # Join the geojson with the DataFrame df_covid_joined = df_covid_agg.merge(world_geojson, how='right', on='Country_Region') # Count min and max values for specific columns min_dict, max_dict = {}, {} column_names = [ 'Confirmed', 'Deaths', 'Active', 'Incident_Rate', 'Case_Fatality_Ratio' ] for name in column_names: min_dict[name] = min(df_covid_joined[name]) max_dict[name] = max(df_covid_joined[name]) # Replace NaNs in the DataFrame with '-1' df_covid_joined.fillna(-1, inplace=True) # Add the data columns to geo json for future popup displaying world_geojson = world_geojson.assign( Confirmed=df_covid_joined['Confirmed'], Deaths=df_covid_joined['Deaths'], Active=df_covid_joined['Active'], Incident_Rate=df_covid_joined['Incident_Rate'], Case_Fatality_Ratio=df_covid_joined['Case_Fatality_Ratio']) print(world_geojson) # Set the correct index columns df_covid_joined.set_index('Country_Region', inplace=True) # Create a lists of evenly spaced attribute values over computed min-max intervals and assign corresponding colors to the DataFrame colormap_dict = {} bins = [] for name in column_names: # Work-around for geometric space not accepting zeros in the sequence tmp_min = min_dict[name] if min_dict[name] < 1: min_dict[name] = 1 inner_bins = np.geomspace(start=min_dict[name], stop=max_dict[name], num=10) min_dict[name] = tmp_min inner_bins = np.delete(inner_bins, 0) inner_bins = np.insert(inner_bins, 0, min_dict[name]) inner_bins = np.insert(inner_bins, 0, -1.) inner_bins = inner_bins.tolist() # Round the inner_bins values before appending to the bins list if name in ['Confirmed', 'Deaths', 'Active']: inner_bins = [int(round(bin, 0)) for bin in inner_bins] else: inner_bins = [round(bin, 2) for bin in inner_bins] bins.append(inner_bins) colormap_dict[name] = cm.StepColormap(colors=color_dict[name], index=inner_bins, vmin=min_dict[name], vmax=max_dict[name]) df_covid_joined[name + '_color'] = df_covid_joined[name].map( lambda x: colormap_dict[name].rgb_hex_str(x)) ''' Initialize the map ''' map_covid = folium.Map(location=[0, 0], zoom_start=4, max_bounds=True, tiles=None) base_map = folium.FeatureGroup(name='Basemap', overlay=True, control=False) folium.TileLayer(min_zoom=3, tiles='OpenStreetMap').add_to(base_map) base_map.add_to(map_covid) ''' Create the content of the map ''' # Create FeatureGroups to group the data feature_groups = [] for category, _ in color_dict.items(): group = folium.FeatureGroup(category, overlay=False) feature_groups.append(group) # Create the choropleths choropleth_confirmed = folium.GeoJson( data=world_geojson, zoom_on_click=False, name='Confirmed Cases', style_function=lambda x: { 'fillColor': df_covid_joined['Confirmed_color'][x['properties']['Country_Region' ]], 'fillOpacity': 0.7, 'color': 'black', 'weight': 0.5 }).add_to(feature_groups[0]) popup_confirmed = folium.GeoJsonPopup( fields=['Country_Region', 'Confirmed'], labels=False) popup_confirmed.add_to(choropleth_confirmed) choropleth_deaths = folium.GeoJson( data=world_geojson, name='Deaths', style_function=lambda x: { 'fillColor': df_covid_joined['Deaths_color'][x['properties']['Country_Region']], 'fillOpacity': 0.7, 'color': 'black', 'weight': 1 }).add_to(feature_groups[1]) popup_deaths = folium.GeoJsonPopup(fields=['Country_Region', 'Deaths'], labels=False) popup_deaths.add_to(choropleth_deaths) choropleth_active = folium.GeoJson( data=world_geojson, name='Active Cases', style_function=lambda x: { 'fillColor': df_covid_joined['Active_color'][x['properties']['Country_Region']], 'fillOpacity': 0.7, 'color': 'black', 'weight': 1 }).add_to(feature_groups[2]) popup_active = folium.GeoJsonPopup(fields=['Country_Region', 'Active'], labels=False) popup_active.add_to(choropleth_active) choropleth_incident_rate = folium.GeoJson( data=world_geojson, name='Incident Rate', style_function=lambda x: { 'fillColor': df_covid_joined['Incident_Rate_color'][x['properties'][ 'Country_Region']], 'fillOpacity': 0.7, 'color': 'black', 'weight': 1 }).add_to(feature_groups[3]) popup_incident_rate = folium.GeoJsonPopup( fields=['Country_Region', 'Incident_Rate'], labels=False) popup_incident_rate.add_to(choropleth_incident_rate) choropleth_case_fatality_ratio = folium.GeoJson( data=world_geojson, name='Case Fatality Ratio', style_function=lambda x: { 'fillColor': df_covid_joined['Case_Fatality_Ratio_color'][x['properties'][ 'Country_Region']], 'fillOpacity': 0.7, 'color': 'black', 'weight': 1 }).add_to(feature_groups[4]) popup_case_fatality_ratio = folium.GeoJsonPopup( fields=['Country_Region', 'Case_Fatality_Ratio'], labels=False) popup_case_fatality_ratio.add_to(choropleth_case_fatality_ratio) # Create the map legends templates legend_str_dict = {} for i, (k, v) in enumerate(color_dict.items()): legend_labels_dict = {} j = 0 for color in v: if j == 0: legend_labels_dict[color] = 'No data' elif j == len(v) - 1: legend_labels_dict[color] = '> ' + str(bins[i][j]) break else: legend_labels_dict[color] = str(bins[i][j]) + ' - ' + str( bins[i][j + 1]) j += 1 legend_str_dict[k] = legend_labels_dict template = utils.create_legend(caption='COVID-19 status as of: ' + str(timestamp) + ' UTC', legend_labels=legend_str_dict) macro = MacroElement() macro._template = Template(template) map_covid.get_root().add_child(macro) for feature_group in feature_groups: feature_group.add_to(map_covid) # Activate Layer Control folium.LayerControl(collapsed=True).add_to(map_covid) ''' Save completed map viz to an appropriate folder ''' map_covid.save( os.path.join(script_dir_path, '..', 'webapp', 'templates', 'COVID-19_viz.html')) print('Successfully created the COVID-19 viz!')
def create_uk_accidents_viz(): ''' Load and pre-process the UK accidents data ''' # Load the accidents data df_accidents_path = os.path.normpath( os.path.join(script_dir_path, '..', 'data', 'Accidents1115.csv')) fields = [ 'Accident_Index', 'Latitude', 'Longitude', 'Date', 'Accident_Severity' ] df_accidents = pd.read_csv(df_accidents_path, index_col='Accident_Index', usecols=fields) # Format and sort by date df_accidents['Date'] = pd.to_datetime(df_accidents['Date'], format='%Y-%m-%d', errors='raise') df_accidents.sort_values('Date', inplace=True) # Drop the rows in which there's no lat lon data df_accidents = df_accidents[df_accidents['Latitude'].notna()] df_accidents.to_csv(df_accidents_path) # Leave only the 2015 accidents df_accidents = df_accidents[df_accidents['Date'].dt.year == 2015] # Get the heatmap index values heatmap_time_dates = df_accidents['Date'].dt.strftime( '%Y-%m-%d %A').unique().tolist() # Get the heatmap data heatmap_time_data = [] for date in heatmap_time_dates: df_accidents_daily = df_accidents.loc[df_accidents['Date'] == date] heatmap_time_data.append(df_accidents_daily[['Latitude', 'Longitude' ]].to_numpy().tolist()) ''' Initialize the map ''' map_accidents = folium.Map(location=[54, -2.4220], zoom_start=6, max_bounds=True, min_zoom=3, max_lat=60, max_lon=5, min_lat=49, min_lon=-12) ''' Create the map content and add it to the map object ''' # Create the HeatMapWithTime heatmap = HeatMapWithTime(heatmap_time_data, index=heatmap_time_dates, name='Traffic accidents in Great Britain (2015)', gradient={ .8: 'blue', .95: 'lime', .998: 'orange', 1: 'red' }, use_local_extrema=False, min_opacity=0, max_opacity=0.7, scale_radius=False) heatmap.add_to(map_accidents) # Create the legend template = utils.create_legend(caption='UK traffic accidents in 2015') macro = MacroElement() macro._template = Template(template) map_accidents.get_root().add_child(macro) ''' Save completed map viz to an appropriate folder ''' map_accidents.save( os.path.join(script_dir_path, '..', 'webapp', 'templates', 'UK_accidents_viz.html')) print('Successfully created the UK accidents viz!')