Пример #1
0
def report():

    cursor = get_hive_cursor()

    if cursor is None:
        return render_template('/main/bi_connection_issue.html')

    # FIXME we probably want to create aggregates on hadoop
    #       and cache them rather than returning the whole data
    #       set here

    # we need to ignore monitoring pings which have rating user_id = -1 
    # and movie_id = -1
    try:
        cursor.execute(
            "select * from movie_ratings where customer_id <> '-1' and movie_id <> '-1'", 
            configuration={ 
                'hive.mapred.supports.subdirectories': 'true', 
                'mapred.input.dir.recursive': 'true' 
                })
    except:
        return render_template('/main/bi_connection_issue.html')

    df = as_pandas(cursor)
    
    count = df.shape[0]

    if count == 0:
       return render_template('/main/bi_no_records.html')

    from bokeh.charts import Bar, output_file, show

    fig = Bar(
            df,
            label='movie_ratings.rating',
            values='movie_ratings.rating',
            agg='count',
            title='Distribution of movie ratings',
            legend=False
            )


    fig.plot_height = 400
    fig.xaxis.axis_label = 'Rating'
    fig.yaxis.axis_label = 'Count ( Rating )'

    js_resources = INLINE.render_js()
    css_resources = INLINE.render_css()

    script, div = components(fig)
    html = flask.render_template(
        '/main/embed.html',
        plot_script=script,
        plot_div=div,
        js_resources=js_resources,
        css_resources=css_resources,
    )
    return encode_utf8(html)
def output_chart(issues_df, output_mode='static'):
    import datetime
    import bokeh
    from bokeh.models import HoverTool

    # Add timestamp to title

    issues_chart = Bar(issues_df,
                       label='value_delivered',
                       values='status',
                       agg='count',
                       stack='status',
                       title=ISSUES_TITLE + " (Updated " +
                       datetime.datetime.now().strftime('%m/%d/%Y') + ")",
                       xlabel="Value Delivered",
                       ylabel="Number of Use Cases",
                       legend='top_right',
                       tools='hover',
                       color=brewer["GnBu"][3])

    issues_chart.plot_width = DESTINATION_FRAME_WIDTH - (HTML_BODY_MARGIN * 2)
    issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN *
                                                           2)
    issues_chart.logo = None
    issues_chart.toolbar_location = None

    hover = issues_chart.select(dict(type=HoverTool))
    hover.tooltips = [("Value Delivered", "$x")]

    #--- Configure output ---
    reset_output()

    if output_mode == 'static':
        # Static file.  CDN is most space efficient
        output_file(ISSUES_FILE,
                    title=ISSUES_TITLE,
                    autosave=False,
                    mode='cdn',
                    root_dir=None)  # Generate file
        save(issues_chart, filename=ISSUES_FILE)
    elif output_mode == 'notebook':
        output_notebook()  # Show inline
        show(issues_chart)
    else:
        # Server (using internal server IP, rather than localhost or external)
        session = bokeh.session.Session(root_url=BOKEH_SERVER_IP,
                                        load_from_config=False)
        output_server("ddod_chart", session=session)
        show(issues_chart)
Пример #3
0
def plot_average_dts(df, hours):
    
    lasti = 0
    tmp = 0
    tmplist = []
    ### Currently using a fixed method timeset, if there is a better way to do a log loop this could be changed. ###
    lasti = 0
    tmp = 0
    tmplist = []
    xlist = []
    for i in range(1,10):
        tmp = 0
        for item in df['noao_time']:
            if item > lasti and item < i:
                tmp = tmp + 1
        tmplist.append(tmp)
        xlist.append(i)
        lasti = i
    for i in range(10,100,10):
        tmp = 0
        for item in df['noao_time']:
            if item > lasti and item < i:
                tmp = tmp + 1
        tmplist.append(tmp)
        xlist.append(i)
        lasti = i
    for i in range(100,1000,100):
        tmp = 0
        for item in df['noao_time']:
            if item > lasti and item < i:
                tmp = tmp + 1
        tmplist.append(tmp)
        xlist.append(i)
        lasti = i


    graph_info = {}
    graph_info['values'] = tmplist
    graph_info['Time in minutes'] = xlist
    p = Bar(graph_info, values='values', label='Time in minutes', ylabel='Number of transfers', color='navy', title='DTS time plot')
    
    p.plot_height = 500
    p.plot_width = 1000   

    return p
def output_chart(issues_df,output_mode='static'):
    import datetime
    import bokeh
    from bokeh.models import HoverTool


    # Add timestamp to title
    
    issues_chart = Bar(issues_df, label='value_delivered', 
               values='status', agg='count', stack='status',
               title=ISSUES_TITLE+" (Updated "+datetime.datetime.now().strftime('%m/%d/%Y')+")", 
               xlabel="Value Delivered",ylabel="Number of Use Cases",
               legend='top_right',
               tools='hover',
               color=brewer["GnBu"][3]
              )

    issues_chart.plot_width  = DESTINATION_FRAME_WIDTH  - (HTML_BODY_MARGIN * 2)
    issues_chart.plot_height = DESTINATION_FRAME_HEIGHT - (HTML_BODY_MARGIN * 2)
    issues_chart.logo = None
    issues_chart.toolbar_location = None

    hover = issues_chart.select(dict(type=HoverTool))
    hover.tooltips = [ ("Value Delivered", "$x")]


    #--- Configure output ---
    reset_output()

    if output_mode == 'static':
        # Static file.  CDN is most space efficient
        output_file(ISSUES_FILE, title=ISSUES_TITLE, 
            autosave=False, mode='cdn', 
            root_dir=None
               )   # Generate file
        save(issues_chart,filename=ISSUES_FILE)
    elif output_mode == 'notebook':
        output_notebook()   # Show inline
        show(issues_chart)
    else:
        # Server (using internal server IP, rather than localhost or external)
        session = bokeh.session.Session(root_url = BOKEH_SERVER_IP, load_from_config=False)
        output_server("ddod_chart", session=session)
        show(issues_chart)
Пример #5
0
def generate_single_bar(df, time_str, color_dict, colors):

	df = df[df.percent_total >= 3]
	title = "Failure Rate " + time_str
	fill = [color_dict[model_key] if model_key in color_dict else 'grey' for model_key in df.model]
	source = ColumnDataSource(dict(color=[c for c in df['color']],
		model=[m for m in df['model']],
		failure_rate=[f for f in df['failure_rate']],
		count=[co for co in df['count']]))

	plot = Bar(df, 'model', values='failure_rate', title=title, source=source, tools=['hover'],color='color', legend=None)
	# outline_line_color="color", border_fill_color='color', 
	hover = plot.select(dict(type=HoverTool))
	hover.tooltips = [
        ("Model ", "@model"),
        ("Failure rate ", "@y"),
        ("Number of drives", "@count")        #("Time ", "@timeline"),
        ]
	hover.mode = 'mouse'
	plot.xaxis.axis_label = 'Model Serial Number'
	plot.yaxis.axis_label = 'Naive Failure Rate'
	plot.title_text_font_size="18px"
	plot.grid.grid_line_alpha = 0
	plot.ygrid.grid_line_color = None
	plot.toolbar.logo = None
	plot.outline_line_width = 0
	plot.outline_line_color = "white"
	plot.plot_height = 600
	plot.plot_width = 800
	plot.xaxis.major_tick_line_color = None
	plot.yaxis.major_tick_line_color = None
	plot.xaxis.axis_line_width = 2
	plot.yaxis.axis_line_width = 2
	plot.title.text_font_size = '16pt'
	plot.xaxis.axis_label_text_font_size = "14pt"
	plot.xaxis.major_label_text_font_size = "14pt"
	plot.yaxis.axis_label_text_font_size = "14pt"
	plot.yaxis.major_label_text_font_size = "14pt"
	return(plot)
Пример #6
0
def summarize_plans(state, age, npi):
    todaysdate = str(datetime.now())
    age = str(age)
    if age > '65':
        age = '65'
    elif age < '20':
        age = '20'
    # filter based on what plans are current (not expired) and age
    filteredplans = pd.read_hdf(
        'webapp/data/plan-rates.h5',
        state,
        where=[
            '(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)'
        ],
        columns=[
            'IndividualRate', 'MetalLevel', 'Age',
            'URLForSummaryofBenefitsCoverage', 'PlanMarketingName'
        ])
    stateave = filteredplans.IndividualRate.mean()
    myave = stateave
    # plot it
    #filteredplans.groupby('MetalLevel').IndividualRate.mean().plot(kind='bar')
    #statebardf = filteredplans.groupby('MetalLevel').IndividualRate.mean()
    statebardf = filteredplans.groupby('MetalLevel', as_index=False).mean()

    #p = mpl.to_bokeh()
    if npi == '':
        p = Bar(filteredplans.groupby('MetalLevel').IndividualRate.mean(),
                values='IndividualRate',
                xlabel="",
                ylabel="Montly Premium ($)")
    else:
        print 'input npi is ' + npi
        # read hdf5 of provider-plan pairings
        provnplanpd = pd.read_hdf('webapp/data/plan_providers.h5', state)
        # make a list of planids for the input npi
        planlst = provnplanpd[provnplanpd.npi == npi].plan_id.values
        print planlst
        # read in hdf5 of plan info
        planinfo = pd.read_hdf(
            'webapp/data/plan-rates.h5',
            state,
            where=[
                '(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)'
            ])

        # for each planid, get the first entry and concatenate them together.
        filteredplans = planinfo[planinfo.PlanId == planlst[0]].groupby(
            'PlanId', as_index=False).first()
        filteredplans.head()
        #pd.concat([filteredplans, filteredplans])

        for ii in range(1, len(planlst)):
            filteredplans = pd.concat([
                filteredplans,
                planinfo[planinfo.PlanId == planlst[ii]].groupby(
                    'PlanId', as_index=False).first()
            ])

        provbardf = filteredplans.groupby('MetalLevel', as_index=False).mean()
        provbardf['average'] = ['provider rates'] * len(
            provbardf.MetalLevel.values)
        statebardf['average'] = ['state average'] * len(
            provbardf.MetalLevel.values)
        myave = statebardf.IndividualRate.mean()
        #provbardf['average'] = [1, 1]
        #statebardf['average'] = [2, 2]
        #plotdf = pd.merge(statebardf, provbardf, on='MetalLevel')
        plotdf = pd.concat([provbardf, statebardf])
        #plotdf['state average'] = statebardf['IndividualRate']
        #plotdf['provider rates'] = provbardf['IndividualRate']
        print plotdf
        p = Bar(plotdf,
                label='MetalLevel',
                values='IndividualRate',
                group='average',
                legend='top_right',
                xlabel="",
                ylabel="Montly Premium ($)")
    p.logo = None
    p.plot_height = 400
    p.toolbar_location = None
    script, div = components(p)
    print filteredplans.to_dict(orient='records')
    return {
        'num_plans':
        len(filteredplans),
        'script':
        script,
        'plot_div':
        div,
        'national_comp':
        format_price_comp(float(age2nationalAverage(age)) - myave),
        'state_comp':
        format_price_comp(float(stateave) - myave),
        'plans':
        render_template('plans.html',
                        plans=filteredplans.sort_values(
                            by='IndividualRate').to_dict(orient='records'))
    }
Пример #7
0
def summarize_plans(state, age, npi):
    todaysdate = str(datetime.now())
    age = str(age)
    if age > '65':
      age = '65'
    elif age < '20':
      age = '20'
    # filter based on what plans are current (not expired) and age
    filteredplans = pd.read_hdf('webapp/data/plan-rates.h5', state, where=['(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)'],
                                columns = ['IndividualRate', 'MetalLevel', 'Age','URLForSummaryofBenefitsCoverage','PlanMarketingName'])
    stateave = filteredplans.IndividualRate.mean()
    myave = stateave 
    # plot it
    #filteredplans.groupby('MetalLevel').IndividualRate.mean().plot(kind='bar')
    #statebardf = filteredplans.groupby('MetalLevel').IndividualRate.mean()
    statebardf = filteredplans.groupby('MetalLevel', as_index=False).mean()
    
    #p = mpl.to_bokeh()
    if npi == '':
      p = Bar(filteredplans.groupby('MetalLevel').IndividualRate.mean(), values='IndividualRate',
              xlabel="", ylabel="Montly Premium ($)")
    else:
      print 'input npi is ' + npi
      # read hdf5 of provider-plan pairings
      provnplanpd = pd.read_hdf('webapp/data/plan_providers.h5', state)
      # make a list of planids for the input npi
      planlst = provnplanpd[provnplanpd.npi == npi].plan_id.values
      print planlst
      # read in hdf5 of plan info
      planinfo = pd.read_hdf('webapp/data/plan-rates.h5', state, where=['(Age==age) & (RateExpirationDate > todaysdate) & (RateEffectiveDate < todaysdate)'])

      # for each planid, get the first entry and concatenate them together.
      filteredplans = planinfo[planinfo.PlanId==planlst[0]].groupby('PlanId', as_index=False).first()
      filteredplans.head()
      #pd.concat([filteredplans, filteredplans])

      for ii in range(1, len(planlst)):
          filteredplans = pd.concat([filteredplans, planinfo[planinfo.PlanId==planlst[ii]].groupby('PlanId', as_index=False).first()])

    
      provbardf = filteredplans.groupby('MetalLevel', as_index=False).mean()
      provbardf['average'] = ['provider rates']*len(provbardf.MetalLevel.values)
      statebardf['average'] = ['state average']*len(provbardf.MetalLevel.values)
      myave = statebardf.IndividualRate.mean()
      #provbardf['average'] = [1, 1]
      #statebardf['average'] = [2, 2]
      #plotdf = pd.merge(statebardf, provbardf, on='MetalLevel')
      plotdf = pd.concat([provbardf, statebardf]) 
      #plotdf['state average'] = statebardf['IndividualRate']
      #plotdf['provider rates'] = provbardf['IndividualRate']
      print plotdf  
      p = Bar(plotdf, label='MetalLevel', values='IndividualRate', group='average', legend='top_right',
              xlabel="", ylabel="Montly Premium ($)")
    p.logo = None
    p.plot_height=400
    p.toolbar_location = None
    script,div =  components(p)
    print filteredplans.to_dict(orient='records')
    return {'num_plans':len(filteredplans), 'script': script, 'plot_div': div,
            'national_comp': format_price_comp(float(age2nationalAverage(age))-myave),
            'state_comp': format_price_comp(float(stateave)-myave),
            'plans': render_template('plans.html', plans=filteredplans.sort_values(by='IndividualRate').to_dict(orient='records'))}