Пример #1
0
def create_gdp_viz():
    '''
    Load and pre-process the geojson file
    '''
    geojson_path = os.path.normpath(
        os.path.join(script_dir_path, '..', 'data', 'borders_geo.json'))
    world_geojson = gpd.read_file(geojson_path)
    world_geojson.drop(columns=['ISO_A2', 'ADMIN'], inplace=True)
    world_geojson.drop(world_geojson[world_geojson['ISO_A3'] == '-99'].index,
                       inplace=True)
    country_list = world_geojson['ISO_A3'].tolist()
    '''
    Load and pre-process the GDP data
    '''
    # Load the GDP data
    df_GDP_path = os.path.normpath(
        os.path.join(script_dir_path, '..', 'data',
                     'GDP_per_capita_world_data.csv'))
    df_GDP = pd.read_csv(df_GDP_path, index_col='Country Code', skiprows=4)

    # Drop unnecessary data
    df_GDP.drop(labels='2020', axis=1, inplace=True)

    csv_country_list = df_GDP.index.tolist()
    country_list = list(set(country_list).intersection(csv_country_list))
    df_GDP.drop(df_GDP[~df_GDP.index.isin(country_list)].index, inplace=True)
    world_geojson.drop(
        world_geojson[~world_geojson['ISO_A3'].isin(country_list)].index,
        inplace=True)
    country_list.sort()

    # Create an enumerated country dict for id mapping
    country_dict = {k: v for v, k in enumerate(country_list)}
    world_geojson['country_id'] = world_geojson['ISO_A3'].map(country_dict)

    # Count min and max GDP values
    min_GDP_val, max_GDP_val = df_GDP[df_GDP.columns[4:]].min().min(), df_GDP[
        df_GDP.columns[4:]].max().max()

    # Create a color list
    color_list = [
        '#808080', '#A50026', '#D73027', '#F46D43', '#FDAE61', '#FEE08B',
        '#FFFFBF', '#D9EF8B', '#A6D96A', '#66BD63', '#1A9850', '#006837'
    ]

    # Create a list of geometrically spaced numbers over a min-max interval
    bins = np.geomspace(min_GDP_val, max_GDP_val, 12)

    # Replace NaNs (records with no data available) with '-1'
    df_GDP.fillna(-1, inplace=True)

    # Add NaN category to the bins
    bins = np.insert(bins, 0, -1.)
    bins = bins.tolist()

    # Append 'color_[year]' columns to the GDP DataFrame
    year = 1960
    while year <= 2019:
        pasted_col_id = df_GDP.columns.get_loc(str(year)) + 1
        col_value = pd.cut(df_GDP[str(year)],
                           bins,
                           include_lowest=True,
                           labels=[
                               '#808080', '#A50026', '#D73027', '#F46D43',
                               '#FDAE61', '#FEE08B', '#FFFFBF', '#D9EF8B',
                               '#A6D96A', '#66BD63', '#1A9850', '#006837'
                           ])
        df_GDP.insert(loc=pasted_col_id,
                      column='color_' + str(year),
                      value=col_value)
        year += 1

    print(df_GDP)
    '''
    Create appropriately formatted dictionary that the TimeSliderChoropleth will receive as an input
    '''
    gdp_dict = {}
    for country_code in df_GDP.index.tolist():
        country_id = str(country_dict[country_code])
        gdp_dict[country_id] = {}
        year = 1960
        while year <= 2019:
            dt_obj = datetime(year=year, month=12, day=31)
            year_in_ms = str(time.mktime(dt_obj.timetuple()))
            color_hex = df_GDP.at[country_code, 'color_' + str(year)]
            gdp_dict[country_id][year_in_ms] = {
                'color': color_hex,
                'opacity': 0.7
            }
            year += 1
    ''' 
    Initialize the map
    '''
    map_GDP = folium.Map(location=[0, 0],
                         zoom_start=4,
                         max_bounds=True,
                         min_zoom=3)
    '''
    Create the map content and add it to the map object
    '''
    # Create the choropleth
    choropleth = TimeSliderChoropleth(
        world_geojson.set_index('country_id').to_json(), styledict=gdp_dict)
    choropleth.add_to(map_GDP)

    # Create the map legend
    legend_labels_dict = {}
    i = 0
    for color in color_list:
        if i == 0:
            legend_labels_dict[color_list[i]] = 'No data'
        elif i == len(color_list) - 1:
            legend_labels_dict[color_list[i]] = '> ' + str(round(bins[i],
                                                                 2)) + '$'
            break
        else:
            legend_labels_dict[color] = str(round(
                bins[i], 2)) + '$' + ' - ' + str(round(bins[i + 1], 2)) + '$'
        i += 1

    template = utils.create_legend(caption='GDP per capita in USD',
                                   legend_labels=legend_labels_dict)
    macro = MacroElement()
    macro._template = Template(template)
    map_GDP.get_root().add_child(macro)
    '''
    Save completed map viz to an appropriate folder
    '''
    map_GDP.save(
        os.path.join(script_dir_path, '..', 'webapp', 'templates',
                     'GDP_viz.html'))
    print('Successfully created the GDP viz!')
Пример #2
0
def create_sf_crime_viz():
    '''
    Load and pre-process the San Francisco crime data
    '''
    # Load the crime data
    df_crime = pd.read_csv(
        os.path.join(data_dir_path, 'last_week_SF_crimes.csv'))

    # Drop the rows in which there's no lat lon data
    df_crime = df_crime[df_crime['latitude'].notna()]
    df_crime = df_crime[df_crime['longitude'].notna()]

    # Create popups and their contents
    popups_list, locations_list = [], []
    for _, row in df_crime.iterrows():
        # Trim unnecessary information from the timestamp
        incident_timestamp = row['incident_datetime']
        incident_timestamp = incident_timestamp.replace('T', ' ')
        incident_timestamp = incident_timestamp[:-7]

        # Create a popup object and append it to the popups array
        popup_content = '<strong>Timestamp: </strong>' + incident_timestamp + '<br>' \
                        + '<strong>Day of the week: </strong>' + row['incident_day_of_week'] + '<br>' \
                        + '<strong>Description: </strong>' + row['incident_description']
        popups_list.append(folium.Popup(html=popup_content))

        # Get the lat, lon location data and add it to the list
        locations_list.append(row[['latitude',
                                   'longitude']].to_numpy().tolist())
    ''' 
    Initialize the map
    '''
    map_crime = folium.Map(location=[37.773972, -122.431297],
                           zoom_start=11,
                           max_bounds=True,
                           min_zoom=9,
                           max_lat=38.5,
                           max_lon=-122,
                           min_lat=37,
                           min_lon=-123)
    '''
    Create the map content and add it to the map object
    '''
    # Create marker cluster
    icon_list = []
    for _ in range(len(locations_list)):
        icon_list.append(
            folium.Icon(icon='exclamation', prefix='fa', color='orange'))

    marker_cluster = MarkerCluster(locations=locations_list,
                                   popups=popups_list,
                                   icons=icon_list)
    marker_cluster.add_to(map_crime)

    # Create map legend
    current_timestamp = datetime.now() - timedelta(days=1)
    week_before = current_timestamp - timedelta(weeks=1)
    current_timestamp = current_timestamp.strftime('%Y-%m-%d')
    week_before = week_before.strftime('%Y-%m-%d')

    template = utils.create_legend(caption='San Francisco crimes between ' +
                                   week_before + ' and ' + current_timestamp)
    macro = MacroElement()
    macro._template = Template(template)
    map_crime.get_root().add_child(macro)
    '''
    Save completed map viz to an appropriate folder
    '''
    map_crime.save(
        os.path.join(script_dir_path, '..', 'webapp', 'templates',
                     'SF_crime_viz.html'))
    print('Successfully created the San Francisco crime viz!')
Пример #3
0
def main():

    ### Main definitions
    ROOT.gROOT.SetBatch()
    ###
    canvas, c_top, c_bottom = utils.create_double_pad('plot_3lcr_2l2v')
    ###
    root_file = ROOT.TFile('inputs/MTW_all_met_tst.root')
    stack = utils.re_style_top(
        root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('hs1'))
    stack.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]')
    stack.GetYaxis().SetTitle('Events / 30 GeV')
    hists = {}
    merging_scheme = utils.complex_merging
    full_stack = stack.GetStack().Last()
    for hist in stack.GetHists():
        prev_color = hist.GetFillColor()
        sample_name = None
        for key, vals in utils.original_colors_2l2v.iteritems():
            if prev_color in vals:
                sample_name = key
        if not sample_name:
            'Sample Name Not Found'
        else:
            '''
      for merging_key, merging_list in merging_scheme.iteritems():
        if sample_name in merging_list:
          print sample_name, ' is supposed to be merged into ', merging_key
      '''
        hists[sample_name] = utils.re_shape_tail(hist)
        '''
    a = hist.GetXaxis()
    print
    print a.GetNbins()
    print a.GetXmin()
    print a.GetXmax()
    '''
    ### Getting complicated here...
    loop_over_this = hists.keys()
    to_be_removed = utils.wjets_removal
    for key in loop_over_this:
        for merging_key, merging_list in merging_scheme.iteritems():
            if key in merging_list:
                to_be_removed.add(key)
                if merging_key in hists:
                    hists[merging_key].Add(hists[key])
                else:
                    hists[merging_key] = hists[key].Clone('another_clone_' +
                                                          hists[key].GetName())
    to_be_used = []
    for k in hists:
        hist = hists[k]
        hist.SetFillColor(utils.get_colors(k))
        hist.SetLineColor(ROOT.kBlack)
        hist.SetLineWidth(1)
        hist.SetTitle(utils.titles[k])
        if not k in to_be_removed:
            to_be_used.append((k, hist.Integral()))
    sample_list = sorted(to_be_used, key=lambda x: x[1])
    sample_list_r = sorted(to_be_used, key=lambda x: x[1], reverse=True)
    new_stack = ROOT.THStack(stack.GetName() + '_clone', '')
    for name, integral in sample_list:
        new_stack.Add(hists[name])
    ###
    data = utils.re_style_top(
        utils.th1_to_tgraph(
            root_file.Get('c1').GetPrimitive('pad1').GetPrimitive(
                'met_tst_MTW_all_Nominal'), True))
    prev_error, last_bin_ratio = utils.re_shape_tail(
        root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('h0'), True)
    error = utils.re_style_top(utils.th1_to_tgraph(prev_error))
    error.SetMarkerStyle(1)
    error.SetFillColor(ROOT.kBlack)
    error.SetFillStyle(3345)
    ###
    ratio_axis = utils.re_style_bot(
        root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h3'))
    ratio = utils.re_style_bot(
        utils.th1_to_tgraph(
            root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h3'), True))
    ratio_axis.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]')
    ratio_axis.GetYaxis().SetTitle('#frac{Data}{Prediction}')
    syst_band = utils.re_style_bot(
        utils.th1_to_tgraph(
            utils.change_last_bin(
                root_file.Get('c1').GetPrimitive('pad2').GetPrimitive('h0'),
                last_bin_ratio)))
    #ratio = utils.re_style_bot( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ) )
    #ratio.GetXaxis().SetTitle( '#it{E}_{T}^{miss} [GeV]' )
    #ratio.GetYaxis().SetTitle( '#frac{Data}{Prediction}' )
    ##syst_band = utils.re_style_bot( utils.re_shape_tail( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ), full_stack ) )
    #syst_band = utils.re_style_bot( utils.change_last_bin( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ), last_bin_ratio ) )
    syst_band.SetMarkerStyle(1)
    syst_band.SetFillColor(ROOT.kBlack)
    syst_band.SetFillStyle(3345)
    ###
    c_top.cd()
    new_stack.SetMaximum(1000000)
    new_stack.SetMinimum(0.001)
    new_stack.Draw('hist')
    new_stack = utils.re_style_top(new_stack)
    new_stack.Draw('hist')
    new_stack.GetXaxis().SetTitle('#it{E}_{T}^{miss} [GeV]')
    new_stack.GetYaxis().SetTitle('Events / 30 GeV')
    new_stack.GetXaxis().SetRangeUser(0, 600)
    error.Draw('e2 same')
    data.Draw('pe')
    x_l1, y_l1, latex1 = utils.draw_latex(utils.lumi, True)
    latex1.DrawLatex(
        x_l1, y_l1 - 0.12, utils.h_to_zz_to + utils.lepp + utils.lepm +
        utils.nu + utils.nubar + utils.void_char + utils.inv_prime)
    latex1.DrawLatex(x_l1, y_l1 - 0.18, '3#it{l} Control Region')
    leg = utils.create_legend(len(sample_list_r), 0.3, 1)
    leg.AddEntry(data, 'Data', 'pe')
    for name, integral in sample_list_r:
        leg.AddEntry(hists[name], hists[name].GetTitle(), 'f')
    leg.AddEntry(error, 'Uncertainty', 'f')
    leg.Draw()
    #utils.patch_bar( 229. / 566., 234. / 566., 322. / 407., 322. / 407., True )
    c_top.SetLogy()
    c_bottom.cd()
    ratio_axis.Draw('axis')
    ratio_axis.GetXaxis().SetRangeUser(0, 600)
    syst_band.Draw('e2 same')
    ratio.Draw('pe0 same')
    c_bottom.Update()
    for arrow in utils.get_arrows(ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(),
                                  ratio):
        arrow.Draw()
    Line = ROOT.TLine(ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1)
    Line.SetLineColor(ROOT.kBlack)
    Line.SetLineWidth(2)
    Line.SetLineStyle(2)
    Line.Draw()
    #canvas.SaveAs( canvas.GetName() + '.pdf' )
    utils.save(canvas)
Пример #4
0
def make_plot( file_name, cat, title, maxYtop, minY = 0.02, maxY = 1.98 ):
#def make_plot( file_name, cat, title, maxYtop, minY = 0.18, maxY = 1.82 ):
#def make_plot( file_name, cat, title, maxYtop, minY = 0., maxY = 3.4 ):
  ###
  canvas, c_top, c_bottom = utils.create_double_pad( 'plot_%s' % file_name.replace( '.root', '' ) )
  ###
  root_file = ROOT.TFile( 'inputs/' + file_name )
  stack = utils.re_style_top( root_file.Get( 'ratio' ).GetPrimitive( 'p1_ratio' ).GetPrimitive( 'combined' ) )
  hists = {}
  merging_scheme = {} #utils.complex_merging
  for hist in stack.GetHists():
    prev_color = hist.GetFillColor()
    sample_name = None
    for key, vals in utils.original_colors_4l.iteritems():
      if prev_color in vals:
        sample_name = key
    '''
    if not sample_name:
      'Sample Name Not Found'
    else:
      hist.SetFillColor( utils.get_colors( sample_name ) ) 
      #hist.SetLineColor( utils.colors[ sample_name ] ) 
      hist.SetLineColor( ROOT.kBlack ) 
      hist.SetLineWidth( 1 ) 
      hist.SetTitle( utils.titles[ sample_name ] ) 
    '''
    hists[ sample_name ] = hist
  ### Getting complicated here...
  loop_over_this = hists.keys()
  to_be_removed = set()
  for key in loop_over_this:
    for merging_key, merging_list in merging_scheme.iteritems():
      if key in merging_list:
        to_be_removed.add( key )
        if merging_key in hists:
          hists[ merging_key ].Add( hists[ key ] ) 
        else:
          hists[ merging_key ] = hists[ key ].Clone( 'another_clone_' + hists[ key ].GetName() )
  to_be_used = []
  for k in hists:
    hist = hists[ k ]
    hist.SetFillColor( utils.get_colors( k ) ) 
    hist.SetLineColor( ROOT.kBlack ) 
    hist.SetLineWidth( 1 ) 
    hist.SetTitle( utils.titles[ k ] ) 
    if not k in to_be_removed:
      penalty = 0
      if k == 'Red':
        penalty = -1
      to_be_used.append( ( k, hist.Integral() + penalty ) )
  sample_list = sorted( to_be_used,  key = lambda x: x[ 1 ] )
  sample_list_r = sorted( to_be_used,  key = lambda x: x[ 1 ], reverse = True )
  new_stack = ROOT.THStack( stack.GetName() + '_clone', '' )
  for name, integral in sample_list:
    new_stack.Add( hists[ name ] )
  data, error = None, None
  for element in root_file.Get( 'ratio' ).GetPrimitive( 'p1_ratio' ).GetListOfPrimitives():
    if element.GetName() == 'Graph':
      if not utils.is_data( element ):
        error = utils.re_style_top( element ) 
        error.SetMarkerStyle( 1 )
        error.SetFillColor( ROOT.kBlack )
        error.SetFillStyle( 3345 )
      else:
        data = utils.re_style_top( element ) 
  ###
  ratio = utils.re_style_bot( root_file.Get( 'ratio' ).GetPrimitive( 'p2_ratio' ).GetPrimitive( 'dataRatio' ) )
  syst_band = utils.re_style_bot( root_file.Get( 'ratio' ).GetPrimitive( 'p2_ratio' ).GetPrimitive( 'sysGraphRatio' ), minY, maxY )
  syst_band.SetMarkerStyle( 1 )
  syst_band.SetFillColor( ROOT.kBlack )
  syst_band.SetFillStyle( 3345 )
  syst_band.GetXaxis().SetTitle( utils.m4l ) #'#it{m}_{4l} [GeV]' )
  syst_band.GetYaxis().SetTitle( '#frac{Data}{Prediction}' )
  ###
  c_top.cd()
  new_stack.SetMinimum( 0.003 ) 
  new_stack.Draw( 'hist' )
  new_stack = utils.re_style_top( new_stack ) 
  new_stack.Draw( 'hist' )
  #new_stack.GetXaxis().SetRangeUser( 250, 1500 )
  new_stack.GetXaxis().SetTitle( utils.m4l ) #'#it{m}_{4l} [GeV]' )
  new_stack.GetYaxis().SetTitle( 'Events / %d GeV' % int( stack.GetXaxis().GetBinWidth( 1 ) ) )
  new_stack.SetMaximum( maxYtop )
  error.Draw( 'e2' )
  data.Draw( 'p same' )
  x_l1, y_l1, latex1 = utils.draw_latex( utils.lumi, True )
  latex1.DrawLatex( x_l1, y_l1 - 0.12, title + utils.void_char )
  latex1.DrawLatex( x_l1, y_l1 - 0.18, cat + utils.void_char )
  leg = utils.create_legend( len( sample_list_r ), 0.3, 1 )
  leg.AddEntry( data, 'Data', 'pe' ) 
  for name, integral in sample_list_r: 
    leg.AddEntry( hists[ name ], hists[ name ].GetTitle(), 'f' ) 
  leg.AddEntry( error, 'Uncertainty', 'f' ) 
  #leg.AddEntry( None, '', '' ) 
  #leg.AddEntry( None, '#color[0]{' + utils.titles[ 'emu' ] + '}', '' ) 
  leg.Draw()
  c_top.Update()
  minX, maxX = ROOT.gPad.GetUxmin(), ROOT.gPad.GetUxmax()
  c_top.SetLogy()
  c_bottom.cd()
  syst_band.Draw( 'ae2' )
  syst_band.GetXaxis().SetRangeUser( minX, maxX )
  ratio.Draw( 'pe0 same' )
  c_bottom.Update()
  for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ):
    arrow.Draw()
  Line = ROOT.TLine( ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1 )
  Line.SetLineColor( ROOT.kBlack )
  Line.SetLineWidth( 2 )
  Line.SetLineStyle( 2 )
  Line.Draw()
  utils.save( canvas )
Пример #5
0
def make_plot( file_name, cat, title, maxYtop, minY = 0.18, maxY = 1.82 ):
  ###
  canvas, c_top, c_bottom = utils.create_double_pad( 'plot_sr_' + cat + 'vv' )
  ###
  root_file = ROOT.TFile( 'inputs/' + file_name )
  stack = utils.re_style_top( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'hs1' ) )
  hists = {} 
  merging_scheme = utils.complex_merging
  for hist in stack.GetHists():
    prev_color = hist.GetFillColor()
    sample_name = None
    for key, vals in utils.original_colors_2l2v.iteritems():
      if prev_color in vals:
        sample_name = key
    '''
    if not sample_name:
      'Sample Name Not Found'
    else:
      hist.SetFillColor( utils.get_colors( sample_name ) ) 
      #hist.SetLineColor( utils.colors[ sample_name ] ) 
      hist.SetLineColor( ROOT.kBlack ) 
      hist.SetLineWidth( 1 ) 
      hist.SetTitle( utils.titles[ sample_name ] ) 
    '''
    hists[ sample_name ] = hist
  ### Getting complicated here...
  loop_over_this = hists.keys()
  to_be_removed = set()#utils.wjets_removal
  for key in loop_over_this:
    for merging_key, merging_list in merging_scheme.iteritems():
      if key in merging_list:
        to_be_removed.add( key )
        if merging_key in hists:
          hists[ merging_key ].Add( hists[ key ] ) 
        else:
          hists[ merging_key ] = hists[ key ].Clone( 'another_clone_' + hists[ key ].GetName() )
  to_be_used = []
  for k in hists:
    hist = hists[ k ]
    hist.SetFillColor( utils.get_colors( k ) ) 
    hist.SetLineColor( ROOT.kBlack ) 
    hist.SetLineWidth( 1 ) 
    hist.SetTitle( utils.titles[ k ] ) 
    if not k in to_be_removed:
      penalty = 0
      if k == 'Wjets':
        penalty = -1
      to_be_used.append( ( k, hist.Integral() + penalty ) )
  sample_list = sorted( to_be_used,  key = lambda x: x[ 1 ] )
  sample_list_r = sorted( to_be_used,  key = lambda x: x[ 1 ], reverse = True )
  new_stack = ROOT.THStack( stack.GetName() + '_clone', '' )
  for name, integral in sample_list:
    new_stack.Add( hists[ name ] )
  data = utils.re_style_top( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'mT_' + cat + '_Nominal' ), True ) )
  utils.print_contents( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'mT_' + cat + '_Nominal' ) )
  error = utils.re_style_top( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad1' ).GetPrimitive( 'h0' ) ) )
  error.SetMarkerStyle( 1 )
  error.SetFillColor( ROOT.kBlack )
  error.SetFillStyle( 3345 )
  ###
  ratio_axis = utils.re_style_bot( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ), minY, maxY ) 
  ratio = utils.re_style_bot( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h3' ), True ), minY, maxY ) 
  ratio_axis.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' ) 
  ratio_axis.GetYaxis().SetTitle( '#frac{Data}{Prediction}' )
  syst_band = utils.re_style_bot( utils.th1_to_tgraph( root_file.Get( 'c1' ).GetPrimitive( 'pad2' ).GetPrimitive( 'h0' ) ) )
  for index in range( syst_band.GetN() ):
    X, Y = ROOT.Double( 0. ), ROOT.Double( 0. )
    syst_band.GetPoint( index, X, Y )
  syst_band.SetMarkerStyle( 1 )
  syst_band.SetFillColor( ROOT.kBlack )
  syst_band.SetFillStyle( 3345 )
  syst_band.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' ) 
  syst_band.GetYaxis().SetTitle( '#frac{Data}{Prediction}' )
  ###
  c_top.cd()
  new_stack.SetMinimum( 0.002 ) 
  new_stack.Draw( 'hist' )
  new_stack = utils.re_style_top( new_stack ) 
  new_stack.Draw( 'hist' )
  new_stack.GetXaxis().SetTitle( '#it{m}_{T}^{' + utils.zz + '} [GeV]' )
  new_stack.GetYaxis().SetTitle( 'Events / %d GeV' % int( stack.GetXaxis().GetBinWidth( 1 ) ) )
  new_stack.SetMaximum( maxYtop )
  new_stack.GetXaxis().SetRangeUser( 250, 1500 )
  error.Draw( '2 same' )
  data.Draw( 'pe' )
  x_l1, y_l1, latex1 = utils.draw_latex( utils.lumi, True )
  latex1.DrawLatex( x_l1, y_l1 - 0.12, title + utils.void_char )
  latex1.DrawLatex( x_l1, y_l1 - 0.18, '' )
  leg = utils.create_legend( len( sample_list_r ), 0.3, 1 )
  leg.AddEntry( data, 'Data', 'pe' ) 
  for name, integral in sample_list_r: 
    leg.AddEntry( hists[ name ], hists[ name ].GetTitle(), 'f' ) 
  leg.AddEntry( error, 'Uncertainty', 'f' ) 
  leg.Draw()
  mod_x, mod_y = 0, 0
  if cat == 'ee':
    mod_x, mod_y = 4, -2
  #utils.patch_bar( ( 228. + mod_x ) / 566., ( 233. + mod_x ) / 566., ( 324. + mod_y ) / 407., ( 324. + mod_y ) / 407., True ) 
  c_top.SetLogy()
  c_bottom.cd()
  ratio_axis.Draw( 'axis' )
  ratio_axis.GetXaxis().SetRangeUser( 250, 1500 )
  syst_band.Draw( 'e2 same' )
  ratio.Draw( 'pe0 same' )
  c_bottom.Update()
  for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ):
    arrow.Draw()
  Line = ROOT.TLine( ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1 )
  Line.SetLineColor( ROOT.kBlack )
  Line.SetLineWidth( 2 )
  Line.SetLineStyle( 2 )
  Line.Draw()
  utils.save( canvas )
Пример #6
0
def make_plot(file_name,
              cat,
              title,
              minX=-999.,
              maxX=-1.,
              minY=0.02,
              maxY=1.98,
              isLog=1):
    print "isLog:", isLog
    ###
    canvas, c_top, c_bottom = utils.create_double_pad(
        'plot_%s' % file_name.replace('.root', ''))
    ###
    root_file = ROOT.TFile('input_offshell/' + file_name)
    ch_file = file_name.split('_')[1]
    val_file = file_name.split('_')[2] + '_' + file_name.split('_')[3]
    stack = utils.re_style_top(
        root_file.Get('c1').GetPrimitive('pad1').GetPrimitive('hs1'))
    hists = {}
    merging_scheme = {}  #utils.complex_merging
    for hist in stack.GetHists():
        prev_color = hist.GetFillColor()
        #print hist, prev_color
        sample_name = None
        for key, vals in utils.original_colors_2l2v.iteritems():
            #print key, vals
            if prev_color in vals:
                sample_name = key
        '''
    if not sample_name:
      'Sample Name Not Found'
    else:
      hist.SetFillColor( utils.get_colors( sample_name ) ) 
      #hist.SetLineColor( utils.colors[ sample_name ] ) 
      hist.SetLineColor( ROOT.kBlack ) 
      hist.SetLineWidth( 1 ) 
      hist.SetTitle( utils.titles[ sample_name ] ) 
    '''
        hists[sample_name] = hist
    ### get those no stack histograms in pad1
    data, error, sbi, sbi5 = None, None, None, None
    for element in root_file.Get('c1').GetPrimitive(
            'pad1').GetListOfPrimitives():
        #print element
        if element.GetName(
        ) == 'stat_sys_band_' + val_file + '_BJETVETO_' + ch_file:
            error = utils.re_style_top(element)
            error.SetMarkerStyle(1)
            error.SetFillColor(ROOT.kBlack)
            error.SetFillStyle(3345)
        elif element.GetName(
        ) == 'data_' + val_file + '_BJETVETO_' + ch_file + '_Nominal':
            data = utils.re_style_top(element)
        elif element.GetName(
        ) == 'SBI5_' + val_file + '_NLOIBJETVETO_' + ch_file + '_Nominal':
            sbi5 = utils.re_style_top(element)
            sbi5.SetMarkerColor(0)
            sbi5.SetMarkerStyle(0)
            sbi5.SetMarkerSize(0)
            sbi5.SetLineColor(616)
            sbi5.SetLineStyle(2)
        elif element.GetName(
        ) == 'SBI_' + val_file + '_NLOIBJETVETO_' + ch_file + '_Nominal':
            sbi = element
            prev_color = sbi.GetFillColor()
            #print sbi, prev_color
            sample_name = None
            for key, vals in utils.original_colors_2l2v.iteritems():
                if prev_color in vals:
                    sample_name = key
            for k in hists:
                hist = hists[k]
                sbi.Add(hist, -1)
            hists[sample_name] = sbi

    ### Getting complicated here...
    loop_over_this = hists.keys()
    to_be_removed = set()
    for key in loop_over_this:
        for merging_key, merging_list in merging_scheme.iteritems():
            if key in merging_list:
                to_be_removed.add(key)
                if merging_key in hists:
                    hists[merging_key].Add(hists[key])
                else:
                    hists[merging_key] = hists[key].Clone('another_clone_' +
                                                          hists[key].GetName())
    to_be_used = [('ZZ', 3), ('Others', 1), ('WZ', 2), ('SBI', 4)]
    for k in hists:
        hist = hists[k]
        hist.SetFillColor(utils.get_colors(k))
        if k == 'SBI': hist.SetLineColor(ROOT.kBlue)
        else: hist.SetLineColor(ROOT.kBlack)
        hist.SetLineWidth(1)
        hist.SetTitle(utils.titles[k])
    #  if not k in to_be_removed:
    #    penalty = 0
    #    to_be_used.append( ( k, hist.Integral() + penalty ) )
    print "to_be_used:", to_be_used
    sample_list = sorted(to_be_used, key=lambda x: x[1])
    sample_list_r = sorted(to_be_used, key=lambda x: x[1], reverse=True)
    new_stack = ROOT.THStack(stack.GetName() + '_clone', '')
    for name, integral in sample_list:
        #print name, integral
        new_stack.Add(hists[name])
    ###
    ratio = utils.re_style_bot(
        root_file.Get('c1').GetPrimitive(
            'pad2').GetPrimitive('ratio_bkg_data_' + val_file + '_BJETVETO_' +
                                 ch_file + '_Nominal'))
    syst_band = utils.re_style_bot(
        root_file.Get('c1').GetPrimitive('pad2').GetPrimitive(
            'ratio_sys_band_' + val_file + '_BJETVETO_' + ch_file), minY, maxY)
    syst_band.SetMarkerStyle(1)
    syst_band.SetFillColor(ROOT.kBlack)
    syst_band.SetFillStyle(3345)
    if val_file == 'met_tst': syst_band.GetXaxis().SetTitle(utils.met)
    elif val_file == 'mT_ZZ': syst_band.GetXaxis().SetTitle(utils.mTZZ)
    syst_band.GetYaxis().SetTitle('#frac{Data}{Prediction}')
    ###
    if minX == -999. or maxX == -1.:
        minX, maxX = ROOT.gPad.GetUxmin(), ROOT.gPad.GetUxmax()
    print "minX =", minX, "maxX =", maxX
    c_top.cd()
    new_stack.SetMinimum(0.003)
    new_stack.Draw('hist')
    new_stack = utils.re_style_top(new_stack)
    new_stack.Draw('hist')
    new_stack.GetXaxis().SetRangeUser(minX, maxX)
    #new_stack.GetXaxis().SetTitle( utils.m4l )
    new_stack.GetYaxis().SetTitle('Events / %d GeV' %
                                  stack.GetXaxis().GetBinWidth(1))
    #new_stack.GetYaxis().SetTitle( 'Events / %s' % stack.GetXaxis().GetBinWidth( 1 ) )
    if isLog == 1: Yscale = 1000
    else: Yscale = 1.6
    new_stack.SetMaximum(data.GetMaximum() * Yscale)
    error.Draw('e2 same')
    sbi5.Draw('hist same')
    data.Draw('p same')
    x_l1, y_l1, latex1 = utils.draw_latex(utils.lumi, True)
    latex1.DrawLatex(x_l1, y_l1 - 0.12, title + utils.void_char)
    latex1.DrawLatex(x_l1, y_l1 - 0.18, cat + utils.void_char)
    leg = utils.create_legend(len(sample_list_r), 0.3, 1)
    leg.AddEntry(data, 'Data', 'pe')
    leg.AddEntry(sbi5, 'gg+VBF#rightarrow(H*#rightarrow)ZZ(#mu_{off-shell}=5)',
                 'l')
    for name, integral in sample_list_r:
        leg.AddEntry(hists[name], hists[name].GetTitle(), 'f')
    leg.AddEntry(error, 'Uncertainty', 'f')
    #leg.AddEntry( None, '', '' )
    #leg.AddEntry( None, '#color[0]{' + utils.titles[ 'emu' ] + '}', '' )
    leg.Draw()
    c_top.Update()
    if isLog == 1: c_top.SetLogy()
    c_bottom.cd()
    syst_band.Draw('e2')
    syst_band.GetXaxis().SetRangeUser(minX, maxX)
    ratio.Draw('pe0 same')
    c_bottom.Update()
    #for arrow in utils.get_arrows( ROOT.gPad.GetUymin(), ROOT.gPad.GetUymax(), ratio ): arrow.Draw()
    Line = ROOT.TLine(ROOT.gPad.GetUxmin(), 1, ROOT.gPad.GetUxmax(), 1)
    Line.SetLineColor(ROOT.kBlack)
    Line.SetLineWidth(2)
    Line.SetLineStyle(2)
    Line.Draw()
    print "isLog:", isLog
    utils.save(canvas, isLog)
Пример #7
0
def create_covid_viz():
    '''
    Load and pre-process the geojson file
    '''
    world_geojson = gpd.read_file(geojson_path)
    world_geojson.drop(columns=['ISO_A2'], inplace=True)
    '''
    Load and pre-process the COVID-19 data
    '''
    # Load the COVID-19 data
    df_covid = pd.read_csv(
        os.path.join(data_dir_path, 'covid_' + newest_dataset))
    timestamp = df_covid['Last_Update'][0]

    # Replace some country names
    df_covid.replace(to_replace={'Country_Region': 'US'},
                     value='United States of America',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': 'Bahamas'},
                     value='The Bahamas',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': 'Congo (Brazzaville)'},
                     value='Republic of Congo',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': 'Congo (Kinshasa)'},
                     value='Democratic Republic of the Congo',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': 'Taiwan*'},
                     value='Taiwan',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': "Cote d'Ivoire"},
                     value='Ivory Coast',
                     inplace=True)
    df_covid.replace(to_replace={'Country_Region': "Czechia"},
                     value='Czech Republic',
                     inplace=True)
    world_geojson.replace(to_replace={'ADMIN': 'Macedonia'},
                          value='North Macedonia',
                          inplace=True)

    # Change the name of 'ADMIN' column in the geojson DF to match the one in COVID DF
    world_geojson.rename(columns={'ADMIN': 'Country_Region'}, inplace=True)

    # Aggregate the data for countries that have regional information
    df_covid_agg = df_covid.groupby('Country_Region').agg({
        'Confirmed':
        'sum',
        'Deaths':
        'sum',
        'Recovered':
        'sum',
        'Active':
        'sum',
        'Incident_Rate':
        'mean',
        'Case_Fatality_Ratio':
        'mean'
    })
    world_geojson = world_geojson.sort_values('Country_Region').reset_index(
        drop=True)

    # Join the geojson with the DataFrame
    df_covid_joined = df_covid_agg.merge(world_geojson,
                                         how='right',
                                         on='Country_Region')

    # Count min and max values for specific columns
    min_dict, max_dict = {}, {}
    column_names = [
        'Confirmed', 'Deaths', 'Active', 'Incident_Rate', 'Case_Fatality_Ratio'
    ]
    for name in column_names:
        min_dict[name] = min(df_covid_joined[name])
        max_dict[name] = max(df_covid_joined[name])

    # Replace NaNs in the DataFrame with '-1'
    df_covid_joined.fillna(-1, inplace=True)

    # Add the data columns to geo json for future popup displaying
    world_geojson = world_geojson.assign(
        Confirmed=df_covid_joined['Confirmed'],
        Deaths=df_covid_joined['Deaths'],
        Active=df_covid_joined['Active'],
        Incident_Rate=df_covid_joined['Incident_Rate'],
        Case_Fatality_Ratio=df_covid_joined['Case_Fatality_Ratio'])
    print(world_geojson)

    # Set the correct index columns
    df_covid_joined.set_index('Country_Region', inplace=True)

    # Create a lists of evenly spaced attribute values over computed min-max intervals and assign corresponding colors to the DataFrame
    colormap_dict = {}
    bins = []
    for name in column_names:
        # Work-around for geometric space not accepting zeros in the sequence
        tmp_min = min_dict[name]
        if min_dict[name] < 1:
            min_dict[name] = 1

        inner_bins = np.geomspace(start=min_dict[name],
                                  stop=max_dict[name],
                                  num=10)
        min_dict[name] = tmp_min
        inner_bins = np.delete(inner_bins, 0)
        inner_bins = np.insert(inner_bins, 0, min_dict[name])
        inner_bins = np.insert(inner_bins, 0, -1.)
        inner_bins = inner_bins.tolist()

        # Round the inner_bins values before appending to the bins list
        if name in ['Confirmed', 'Deaths', 'Active']:
            inner_bins = [int(round(bin, 0)) for bin in inner_bins]
        else:
            inner_bins = [round(bin, 2) for bin in inner_bins]

        bins.append(inner_bins)
        colormap_dict[name] = cm.StepColormap(colors=color_dict[name],
                                              index=inner_bins,
                                              vmin=min_dict[name],
                                              vmax=max_dict[name])
        df_covid_joined[name + '_color'] = df_covid_joined[name].map(
            lambda x: colormap_dict[name].rgb_hex_str(x))
    ''' 
    Initialize the map
    '''
    map_covid = folium.Map(location=[0, 0],
                           zoom_start=4,
                           max_bounds=True,
                           tiles=None)
    base_map = folium.FeatureGroup(name='Basemap', overlay=True, control=False)
    folium.TileLayer(min_zoom=3, tiles='OpenStreetMap').add_to(base_map)
    base_map.add_to(map_covid)
    '''
    Create the content of the map
    '''
    # Create FeatureGroups to group the data
    feature_groups = []
    for category, _ in color_dict.items():
        group = folium.FeatureGroup(category, overlay=False)
        feature_groups.append(group)

    # Create the choropleths
    choropleth_confirmed = folium.GeoJson(
        data=world_geojson,
        zoom_on_click=False,
        name='Confirmed Cases',
        style_function=lambda x: {
            'fillColor':
            df_covid_joined['Confirmed_color'][x['properties']['Country_Region'
                                                               ]],
            'fillOpacity':
            0.7,
            'color':
            'black',
            'weight':
            0.5
        }).add_to(feature_groups[0])
    popup_confirmed = folium.GeoJsonPopup(
        fields=['Country_Region', 'Confirmed'], labels=False)
    popup_confirmed.add_to(choropleth_confirmed)

    choropleth_deaths = folium.GeoJson(
        data=world_geojson,
        name='Deaths',
        style_function=lambda x: {
            'fillColor':
            df_covid_joined['Deaths_color'][x['properties']['Country_Region']],
            'fillOpacity':
            0.7,
            'color':
            'black',
            'weight':
            1
        }).add_to(feature_groups[1])
    popup_deaths = folium.GeoJsonPopup(fields=['Country_Region', 'Deaths'],
                                       labels=False)
    popup_deaths.add_to(choropleth_deaths)

    choropleth_active = folium.GeoJson(
        data=world_geojson,
        name='Active Cases',
        style_function=lambda x: {
            'fillColor':
            df_covid_joined['Active_color'][x['properties']['Country_Region']],
            'fillOpacity':
            0.7,
            'color':
            'black',
            'weight':
            1
        }).add_to(feature_groups[2])
    popup_active = folium.GeoJsonPopup(fields=['Country_Region', 'Active'],
                                       labels=False)
    popup_active.add_to(choropleth_active)

    choropleth_incident_rate = folium.GeoJson(
        data=world_geojson,
        name='Incident Rate',
        style_function=lambda x: {
            'fillColor':
            df_covid_joined['Incident_Rate_color'][x['properties'][
                'Country_Region']],
            'fillOpacity':
            0.7,
            'color':
            'black',
            'weight':
            1
        }).add_to(feature_groups[3])
    popup_incident_rate = folium.GeoJsonPopup(
        fields=['Country_Region', 'Incident_Rate'], labels=False)
    popup_incident_rate.add_to(choropleth_incident_rate)

    choropleth_case_fatality_ratio = folium.GeoJson(
        data=world_geojson,
        name='Case Fatality Ratio',
        style_function=lambda x: {
            'fillColor':
            df_covid_joined['Case_Fatality_Ratio_color'][x['properties'][
                'Country_Region']],
            'fillOpacity':
            0.7,
            'color':
            'black',
            'weight':
            1
        }).add_to(feature_groups[4])
    popup_case_fatality_ratio = folium.GeoJsonPopup(
        fields=['Country_Region', 'Case_Fatality_Ratio'], labels=False)
    popup_case_fatality_ratio.add_to(choropleth_case_fatality_ratio)

    # Create the map legends templates
    legend_str_dict = {}
    for i, (k, v) in enumerate(color_dict.items()):
        legend_labels_dict = {}
        j = 0
        for color in v:
            if j == 0:
                legend_labels_dict[color] = 'No data'
            elif j == len(v) - 1:
                legend_labels_dict[color] = '> ' + str(bins[i][j])
                break
            else:
                legend_labels_dict[color] = str(bins[i][j]) + ' - ' + str(
                    bins[i][j + 1])
            j += 1
        legend_str_dict[k] = legend_labels_dict

    template = utils.create_legend(caption='COVID-19 status as of: ' +
                                   str(timestamp) + ' UTC',
                                   legend_labels=legend_str_dict)
    macro = MacroElement()
    macro._template = Template(template)
    map_covid.get_root().add_child(macro)

    for feature_group in feature_groups:
        feature_group.add_to(map_covid)

    # Activate Layer Control
    folium.LayerControl(collapsed=True).add_to(map_covid)
    '''
    Save completed map viz to an appropriate folder
    '''
    map_covid.save(
        os.path.join(script_dir_path, '..', 'webapp', 'templates',
                     'COVID-19_viz.html'))
    print('Successfully created the COVID-19 viz!')
Пример #8
0
def create_uk_accidents_viz():
    '''
    Load and pre-process the UK accidents data
    '''
    # Load the accidents data
    df_accidents_path = os.path.normpath(
        os.path.join(script_dir_path, '..', 'data', 'Accidents1115.csv'))
    fields = [
        'Accident_Index', 'Latitude', 'Longitude', 'Date', 'Accident_Severity'
    ]
    df_accidents = pd.read_csv(df_accidents_path,
                               index_col='Accident_Index',
                               usecols=fields)

    # Format and sort by date
    df_accidents['Date'] = pd.to_datetime(df_accidents['Date'],
                                          format='%Y-%m-%d',
                                          errors='raise')
    df_accidents.sort_values('Date', inplace=True)

    # Drop the rows in which there's no lat lon data
    df_accidents = df_accidents[df_accidents['Latitude'].notna()]

    df_accidents.to_csv(df_accidents_path)

    # Leave only the 2015 accidents
    df_accidents = df_accidents[df_accidents['Date'].dt.year == 2015]

    # Get the heatmap index values
    heatmap_time_dates = df_accidents['Date'].dt.strftime(
        '%Y-%m-%d %A').unique().tolist()

    # Get the heatmap data
    heatmap_time_data = []
    for date in heatmap_time_dates:
        df_accidents_daily = df_accidents.loc[df_accidents['Date'] == date]
        heatmap_time_data.append(df_accidents_daily[['Latitude', 'Longitude'
                                                     ]].to_numpy().tolist())
    '''    
    Initialize the map
    '''
    map_accidents = folium.Map(location=[54, -2.4220],
                               zoom_start=6,
                               max_bounds=True,
                               min_zoom=3,
                               max_lat=60,
                               max_lon=5,
                               min_lat=49,
                               min_lon=-12)
    '''
    Create the map content and add it to the map object
    '''
    # Create the HeatMapWithTime
    heatmap = HeatMapWithTime(heatmap_time_data,
                              index=heatmap_time_dates,
                              name='Traffic accidents in Great Britain (2015)',
                              gradient={
                                  .8: 'blue',
                                  .95: 'lime',
                                  .998: 'orange',
                                  1: 'red'
                              },
                              use_local_extrema=False,
                              min_opacity=0,
                              max_opacity=0.7,
                              scale_radius=False)
    heatmap.add_to(map_accidents)

    # Create the legend
    template = utils.create_legend(caption='UK traffic accidents in 2015')
    macro = MacroElement()
    macro._template = Template(template)
    map_accidents.get_root().add_child(macro)
    '''
    Save completed map viz to an appropriate folder
    '''
    map_accidents.save(
        os.path.join(script_dir_path, '..', 'webapp', 'templates',
                     'UK_accidents_viz.html'))
    print('Successfully created the UK accidents viz!')