Пример #1
0
def zooming_ticker():
    """
    Create a composite ticker so that sensible axis values and tick intervals are used at all zoom levels
    :return: A bokeh composite ticker
    """
    return CompositeTicker(tickers=[
        AdaptiveTicker(base=10,
                       mantissas=list(range(1, 10)),
                       min_interval=1,
                       max_interval=1,
                       num_minor_ticks=1),
        AdaptiveTicker(base=10,
                       mantissas=list(range(1, 10)),
                       min_interval=2,
                       max_interval=2,
                       num_minor_ticks=2),
        AdaptiveTicker(base=10,
                       mantissas=list(range(1, 10)),
                       min_interval=3,
                       max_interval=3,
                       num_minor_ticks=3),
        AdaptiveTicker(base=10,
                       mantissas=list(range(1, 10)),
                       min_interval=4,
                       max_interval=4,
                       num_minor_ticks=4),
        AdaptiveTicker(base=10,
                       mantissas=list(range(1, 10)),
                       min_interval=5,
                       num_minor_ticks=5)
    ])
Пример #2
0
def plot_amp_qa(data, name, lower=None, upper=None, amp_keys=None, title=None, plot_height=80, plot_width=700, ymin=None, ymax=None):
    '''Creates gridplot of 3 camera separated amp plots
    Args:
        data: table of per_amp qadata
        name: metric being plotted (str)
    Options:
        lower: list of lower thresholds per camera from get_thresholds()
            format: [[lower_errB, lowerB], [lower_errR, lowerR], [lower_errZ, lowerZ]]
        upper: list of upper thresholds per camera from get_thresholds()
            format : [[upperB, upper_errB], [upperR, upper_errR], [upperZ, upper_errZ]]
        amp_keys: list of amps that have data
        title: title for plot, if different than name (str)
        plot_height, plot_width: height, width of graph in pixels
        ymin/ymax: lists of y axis ranges for B, R, Z plots, unless data exceeds these
    Output:
        Bokeh gridplot object'''
    
    if title == None:
        title = name
        
    labels = [(spec, amp) for spec in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] for amp in ['A', 'B', 'C', 'D']]
    
    figs = []
    for cam in ['B', 'R', 'Z']:
        if cam == 'B':
            fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height+25, plot_width=plot_width, ymin=ymin[0], ymax=ymax[0])
        if cam == 'R':
            fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height, plot_width=plot_width, ymin=ymin[1], ymax=ymax[1])
        if cam == 'Z':
            fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height, plot_width=plot_width, ymin=ymin[2], ymax=ymax[2])
            
        
        if name == "BIAS":
            fig.yaxis.ticker = AdaptiveTicker(base=10, desired_num_ticks=5, 
                                              mantissas=np.arange(1, 5.5, 0.5), 
                                              min_interval=1)
            fig.yaxis.formatter = NumeralTickFormatter(format='e')
        else:
            fig.yaxis.ticker = AdaptiveTicker(base=10, desired_num_ticks=5, 
                                              mantissas=np.arange(1, 5.5, 0.5), 
                                              min_interval=1)
            fig.yaxis.formatter = NumeralTickFormatter(format='a')
        figs.append(fig)
    
    # x-axis labels for spectrograph 0-9 and amplifier A-D
    axis = bk.figure(x_range=FactorRange(*labels), toolbar_location=None,
                     plot_height=50, plot_width=plot_width,
                     y_axis_location=None)
    axis.line(x=labels, y=0, line_color=None)
    axis.grid.grid_line_color=None
    axis.outline_line_color=None

    fig = gridplot([[figs[0]], [figs[1]], [figs[2]], [axis]], toolbar_location='right')

    return fig
Пример #3
0
def make_plot(source,bgvar):
       #Define a sequential multi-hue color palette.
       palette = bokeh.palettes.Plasma[7]
       #Reverse color order so that dark blue is highest obesity.
       palette = palette[::-1]
       #Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
       color_mapper = LinearColorMapper(palette = palette)

       hover = HoverTool(tooltips = [ ('Life Expectancy','@outp_life'),('Predicted Life Expectancy','@outp_outp'),(feb[str(bgvar)],('@'+str(bgvar)))])
       #Create color bar.
       ticker = AdaptiveTicker()

       color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 20, height = 500,
       border_line_color=None,location = (0,0), orientation = 'vertical',ticker=ticker)
       #Create figure object.
       p = figure(title = 'Life Expectancy', plot_height = 600 , plot_width = 450, toolbar_location = None, tools=[hover],x_range=(-10060000, -10035000), y_range=(4658000, 4685000), x_axis_type="mercator", y_axis_type="mercator")
       p.add_tile(tile_provider)
       p.xgrid.grid_line_color = None
       p.ygrid.grid_line_color = None
       #Add patch renderer to figure. 
       p.patches('xs','ys', source = source,fill_color = {'field' :str(bgvar), 'transform' : color_mapper}, line_color = 'black', line_width = 0.25, fill_alpha = 0.26)
       #Specify figure layout.
       p.add_layout(color_bar, 'right')
       
       return p
Пример #4
0
def plot_fp_temp(data, source):
    tooltips = ([('cursor obsXY', '($x, $y)')]
                + [(col_name, '@'+col_name) for col_name in data.columns
                   if col_name not in ['line_color']])
    fp_temp = figure(title='Focal Plane Temperature',
                     tools='pan,wheel_zoom,reset,hover,save',
                     tooltips=tooltips,
                     aspect_scale=1, plot_width=950, plot_height=1000)
    fp_temp.xaxis.axis_label = 'obsX / mm'
    fp_temp.yaxis.axis_label = 'obsY / mm'
    fp_temp.hover.show_arrow = True
    # low = data['temp_color'].min(skipna=True)
    # high = data['temp_color'].max(skipna=True)
    # old high was 30 (warning limit)
    low, high = 15, 35  # colormap isn't auto-updated when new data come in
    # old palette Magma256
    color_mapper = LinearColorMapper(palette=linear_bmy_10_95_c78, low=low, high=high)
    fp_temp.circle(
        x='obs_x', y='obs_y', source=source, radius=5,
        fill_color={'field': 'temp_color', 'transform': color_mapper},
        fill_alpha=0.7, line_color='line_color', line_width=1.8,
        hover_line_color='black')
    colorbar = ColorBar(color_mapper=color_mapper,  # border_line_color=None,
                        ticker=AdaptiveTicker(), orientation='horizontal',
                        title='absolute device temperature / °C',
                        padding=5, location=(300, 0), height=15, width=250)
    fp_temp.add_layout(colorbar, place='above')  # above
    return fp_temp
Пример #5
0
    def create_figure1():
        xsp1 = dfp1[x.value].values
        ysp1 = dfp1[y.value].values
        x_titlep1 = x.value.title()
        y_titlep1 = y.value.title()

        kwp1 = dict()
        if x.value in discrete:
            kwp1['x_range'] = sorted(set(xsp1))
        if y.value in discrete:
            kwp1['y_range'] = sorted(set(ysp1))
        kwp1['title'] = "%s vs %s" % (x_titlep1, y_titlep1) + " for {} on {} and {}".format(BotName, EURUSD, TimeFrame)

        pp1 = figure(plot_height=400, plot_width=800, tools='pan,box_zoom,hover,reset,lasso_select', **kwp1)
        pp1.xaxis.axis_label = x_titlep1
        pp1.yaxis.axis_label = y_titlep1

        if x.value in discrete:
            pp1.xaxis.major_label_orientation = pd.np.pi / 4

        sz = 9
        if size.value != 'None':
            if len(set(dfp2[size.value])) > N_SIZES:
                groups = pd.qcut(dfp2[size.value].values, N_SIZES, duplicates='drop')
            else:
                groups = pd.Categorical(dfp2[size.value])
            sz = [SIZES[xx] for xx in groups.codes]

        c = "#31AADE"
        if color.value != 'None':
            if len(set(dfp2[color.value])) > N_COLORS:
                groups = pd.qcut(dfp2[color.value].values, N_COLORS, duplicates='drop')
            else:
                groups = pd.Categorical(dfp2[color.value])
            c = [COLORS[xx] for xx in groups.codes]

        # COLOR BAR NEXT TO GRAPHIC

        #PAIR 1
        try:
            Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1['Profit']),high=max(dfp1['Profit']))  # arreglar Maximo y minimo para que agarren el valor
        except ValueError:
            Var_color_mapper = LinearColorMapper(palette="Inferno256",low=0,high=1)
            print('This {} did not launch Phase {} on {}'.format(BotName,Phase,TimeFrame))

        #Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1[color.value]),high=max(dfp1[color.value]))  # arreglar Maximo y minimo para que agarren el valor
        GraphTicker = AdaptiveTicker(base=50,desired_num_ticks=10,num_minor_ticks=20,max_interval=1000)
        Color_legend = ColorBar(color_mapper=Var_color_mapper,ticker =GraphTicker,label_standoff=12, border_line_color=None,location=(0, 0)) #arreglar LogTicker para que muestre por al escala del color
        pp1.circle(x=xsp1, y=ysp1, color=c, size=sz, line_color="white", alpha=0.6, hover_color='white', hover_alpha=0.5)
        pp1.add_layout(Color_legend,'right')

        return pp1
Пример #6
0
class TimeTicker(CompositeTicker):
    """ Generate nice ticks across different time scales.
    """
    __implementation__ = 'time_ticker.coffee'

    num_minor_ticks = Override(default=4)
    tickers = Override(default=lambda: [
        AdaptiveTicker(mantissas=[1, 2, 5],
                       base=10,
                       min_interval=ONE_NANO,
                       max_interval=500 * ONE_MILLI,
                       num_minor_ticks=5),
        AdaptiveTicker(mantissas=[1, 2, 5, 10, 15, 20, 30],
                       base=60,
                       min_interval=ONE_SECOND,
                       max_interval=30 * ONE_MINUTE,
                       num_minor_ticks=4),
        AdaptiveTicker(mantissas=[1, 2, 4, 6, 8, 12],
                       base=24,
                       min_interval=ONE_HOUR,
                       max_interval=None,
                       num_minor_ticks=4)
    ])
Пример #7
0
    def build_plot(self, permit_type: str, year: int):
        assert (permit_type == 'total' or permit_type in self.permit_types)
        assert (year in self.years)

        plot_info = self.build_data_source(year, permit_type)

        color_mapper = LinearColorMapper(palette=Palette,
                                         low=plot_info['min_count'],
                                         high=plot_info['max_count'])
        clean_title = f"{permit_type.replace('PERMIT - ', '').title()} - {year}"

        p = figure(
            title=clean_title,
            tools='hover,zoom_in, zoom_out',
            x_axis_location=None,
            y_axis_location=None,
            tooltips=[("Zip Code", "@zip"),
                      (clean_title, f"@{plot_info['target_field']}")],
        )

        p.grid.grid_line_color = None
        p.hover.point_policy = "follow_mouse"

        p.patches('xs',
                  'ys',
                  source=plot_info['column_data_source'],
                  fill_color={
                      'field': f"{plot_info['target_field']}",
                      'transform': color_mapper
                  },
                  fill_alpha=0.7,
                  line_color="white",
                  line_width=0.5)

        color_bar = ColorBar(color_mapper=color_mapper,
                             ticker=AdaptiveTicker(),
                             border_line_color=None,
                             location=(0, 0))

        p.outline_line_color = None

        p.add_layout(color_bar, 'right')
        p.toolbar.logo = None
        p.toolbar_location = None

        return p
Пример #8
0
def plot_heatmap(col):
    '''col is a column name in calibdf, which can be
    R1R2_sum, residuals, gear_ratio_T, gear_ratio_P'''
    # preset data for each quantity to be plotted
    names = {'R1R2_sum': 'R1+R2', 'residuals': 'RMS residuals',
             'GEAR_CALIB_T': 'Gear ratio θ', 'GEAR_CALIB_P': 'Gear ratio φ'}
    units = {'R1R2_sum': ' / mm', 'residuals': ' / mm',
             'GEAR_CALIB_T': '', 'GEAR_CALIB_P': ''}
    lims = {'R1R2_sum': (5.5, 6.5), 'residuals': (0, 0.02),
            'GEAR_CALIB_T': (0.8, 1.2), 'GEAR_CALIB_P': (0.8, 1.2)}
    # begin plot
    data, calibdf = pcm.data, pcm.calibdf
    name, unit, lim = names[col], units[col], lims[col]
    tooltips = ([('cursor obsXY', '($x, $y)')]
                + [(col, '@'+col) for col in filter_cols(calibdf.columns)])
    heatmap = figure(
        title=f'{name}, expid {data.expid}, {data.mode}',
        tools='pan,wheel_zoom,reset,hover,save', tooltips=tooltips,
        frame_width=400, frame_height=400,
        x_range=(-420, 420), y_range=(-420, 420))
    heatmap.xaxis.axis_label = 'obsX / mm'
    heatmap.yaxis.axis_label = 'obsY / mm'
    heatmap.hover.show_arrow = True
    # low = calibdf[quantity].min(skipna=True)
    # high = calibdf[quantity].max(skipna=True)
    color_mapper = LinearColorMapper(
        palette=Magma256, low=lim[0], high=lim[1])
    heatmap_src = ColumnDataSource(calibdf)
    heatmap.circle(
        x='obs_x', y='obs_y', source=heatmap_src, radius=5,
        fill_color={'field': col, 'transform': color_mapper},
        fill_alpha=0.7, line_color='white', line_width=1.8,
        hover_line_color='black')
    colorbar = ColorBar(
        title=name+unit, color_mapper=color_mapper, ticker=AdaptiveTicker(),
        orientation='horizontal',
        padding=5, location=(0, 0), height=10, width=390)
    heatmap.add_layout(colorbar, place='above')  # above
    return heatmap, heatmap_src
Пример #9
0
def create_daily_res_plot(res_forecast, load_forecast):
    """
    Graph the res injection forecast.

    Arguments:
        res_forecast (list): list of renewable energy injection forecast
        load_forecast (list): list of load forecast
    """
    # Datetime range
    time_of_day = []

    # Create x-axis
    # beginning of day
    today = datetime.datetime.today()
    beginning_of_day = datetime.datetime(year=today.year,
                                         month=today.month,
                                         day=today.day)

    for i in range(len(res_forecast)):
        time_of_day.append(beginning_of_day +
                           datetime.timedelta(minutes=i * 30))

    # Compute 75 percentile
    percentile = np.percentile(res_forecast, 75)

    # Initialize dictionaries
    normal_dict = {'x': [], 'y': [], 'percentage': []}
    peak_dict = {'x': [], 'y': [], 'percentage': []}

    for i in range(len(res_forecast)):
        if res_forecast[i] >= percentile:
            peak_dict['x'].append(time_of_day[i])
            peak_dict['y'].append(res_forecast[i])
            peak_dict['percentage'].append(
                percentage_of(res_forecast[i], load_forecast[i]))
        else:
            normal_dict['x'].append(time_of_day[i])
            normal_dict['y'].append(res_forecast[i])
            normal_dict['percentage'].append(
                percentage_of(res_forecast[i], load_forecast[i]))

    # Hover tool to properly display time of day and value on hover
    hover = HoverTool(
        tooltips=[("Time of day", "@x{%H:%M}"), ("Forecast Value", "@y MWh"),
                  ("Percentage of Daily Load", "@percentage{1.11} %")],
        formatters={'@x': 'datetime'},
    )

    # Create the figure
    plot = figure(
        x_axis_label="Time of Day",
        y_axis_label="Megawatts Per Hour",
        x_axis_type='datetime',
        sizing_mode="stretch_width",
        tools=[
            hover,
            BoxZoomTool(),
            ResetTool(),
            LassoSelectTool(),
            WheelZoomTool(),
            PanTool(),
            SaveTool()
        ],
    )

    plot.xaxis.formatter = DatetimeTickFormatter(
        minutes=["%H:%M"],
        hours=["%H:%M"],
    )

    # Set x-range and y-range
    plot.y_range = Range1d(min(res_forecast) - 200, max(res_forecast) + 100)
    plot.x_range = Range1d(time_of_day[0] - datetime.timedelta(minutes=5),
                           time_of_day[-1] + datetime.timedelta(minutes=5))

    # Set a grid
    plot.grid.minor_grid_line_color = '#eeeeee'

    # Set the font and style of labels
    plot.axis.axis_label_text_font = "raleway"
    plot.axis.axis_label_text_font_style = "normal"

    # Set the font of ticks on the axis
    plot.axis.major_label_text_font = "raleway"

    # Set the desired ticks
    plot.xaxis.ticker = DatetimeTicker(desired_num_ticks=24)
    plot.yaxis.ticker = AdaptiveTicker(desired_num_ticks=20)

    # Add a line plot
    plot.line(time_of_day,
              res_forecast,
              line_alpha=0.2,
              color="#264b01",
              line_width=1.5)

    # Add two circle plots one for the normal values and one for those that
    # are at or above the 75-percentile
    plot.circle('x', 'y', source=normal_dict, size=8, color="#264b01")
    plot.circle('x', 'y', source=peak_dict, size=15, color="#264b01")

    return components(plot)
def show_data_visualization(source):
    """Show the data visualization in a webpage."""
    # Set up the plot window
    # Another map tile option: Vendors.STAMEN_TERRAIN)
    tile_provider = get_provider(Vendors.CARTODBPOSITRON)
    sf_lat = (37.73, 37.81)
    sf_long = (-122.47, -122.359720)
    sf_xrange = [long_to_merc(long) for long in sf_long]
    sf_yrange = [lat_to_merc(lat) for lat in sf_lat]
    plot_options = dict(plot_width=1000, plot_height=800, title='Hourly Net Change in Bikes Docked')
    p = figure(x_range=sf_xrange, y_range=sf_yrange,
               x_axis_type="mercator", y_axis_type="mercator",
               tooltips=[("Net Change", "@net_change"), ("ID", "@id"), ("Station", "@name")],
               **plot_options)
    p.add_tile(tile_provider)
    # Add a color bar
    palette = RdBu[11]
    palette.reverse()
    color_mapper = LinearColorMapper(palette=palette, low=-30, high=30)
    color_bar = ColorBar(color_mapper=color_mapper, ticker=AdaptiveTicker(),
                         label_standoff=12, border_line_color=None, location=(0,0))
    p.add_layout(color_bar, 'right')
    # Add the station points as circles
    p.circle(x='x', y='y', size=15,
             fill_color={'field': 'net_change', 'transform': color_mapper},
             fill_alpha=0.8, source=source,
             )
    # add two sliders: one for date, one for hour
    start_date, end_date = datetime.date(2019,9,1), datetime.date(2019,9,30)
    date_fmt = '%Y%m%d'
    # Out of simplicity, setting the dates to ints to make the slider work here
    date_slider = Slider(start=int(start_date.strftime(date_fmt)), end=int(end_date.strftime(date_fmt)), step=1, value=int(start_date.strftime(date_fmt)), title='Date')
    hour_slider = Slider(start=0, end=23, value=9, step=1, title="Hour of Day")
    date_callback = CustomJS(args=dict(source=source), code="""
        var data = source.data;
        var curr_date = cb_obj.value;
        data['net_change'] = data[curr_date + ' ' + data['curr_hr'][0]];
        source.change.emit();
    """)
    hour_callback = CustomJS(args=dict(source=source), code="""
        var data = source.data;
        function pad(n, width, z) {
          z = z || '0';
          n = n + '';
          return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n;
        }
        var curr_hr = String(cb_obj.value).padStart(2, '0');
        data['curr_hr'][0] = curr_hr;
        data['net_change'] = data[data['curr_date'][0] + ' ' + curr_hr];
        source.change.emit();
    """)
    output_file("net_bikes.html")
    date_slider.js_on_change('value', date_callback)
    hour_slider.js_on_change('value', hour_callback)
    # Display on the page
    show(
        column(
            row(
                widgetbox(date_slider),
                widgetbox(hour_slider),
            ),
            p
        )
    )
Пример #11
0
def generate_heatmap(data, keywords):
    colors = heatmap_colors
    colorkey = 'red-blue'
    data = data.sort_values(by='pointtimestamp')

    if 'color' in keywords:
        if keywords['color'] in colors:
            colorkey = keywords['color']
    mapper = LinearColorMapper(palette=colors[colorkey],
                               low=data['pointvalue'].min(),
                               high=data['pointvalue'].max())

    if data['pointvalue'].min() == data['pointvalue'].max():
        mapper = LinearColorMapper(palette=colors[colorkey],
                                   low=data['pointvalue'].min() - 1,
                                   high=data['pointvalue'].max() + 1)

    source = ColumnDataSource(data)
    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
    p = figure(title=data['pointname'][0],
               y_range=list(reversed(data['date'].unique())),
               x_range=list(data['time'].unique()),
               x_axis_location="above",
               plot_width=1000,
               plot_height=700,
               tools=TOOLS,
               toolbar_location='below',
               sizing_mode='scale_width')
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "5pt"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = 3.14 / 3
    p.xgrid.grid_line_color = None

    p.rect(x="time",
           y="date",
           width=1,
           height=.95,
           source=data,
           fill_color={
               'field': 'pointvalue',
               'transform': mapper
           },
           line_color=None)

    p.select_one(HoverTool).tooltips = [
        ('date', '@date @time'),
        ('pointvalue', '@pointvalue ' + data['units'][0]),
    ]

    color_bar = ColorBar(color_mapper=mapper,
                         ticker=AdaptiveTicker(),
                         formatter=PrintfTickFormatter(format="%d " +
                                                       data['units'][0]),
                         label_standoff=15,
                         location=(0, 0))

    p.add_layout(color_bar, 'right')
    script, plot = components(p)
    color_picker = "Colors: <select class='color-picker' name='color'>"
    for color in colors:
        selected = ''
        if colorkey == color:
            selected = 'selected'
        color_picker += "<option value='" + color + "' " + selected + ">" + color.title(
        ) + "</option>"
    color_picker += "</select>"
    plot = color_picker + plot
    return script, plot  # Embed figure in template
Пример #12
0
    def get_time_charts(self, time_selector, suffix, width=600, height=350):
        charts = []

        selector = time_selector(self.metrics['completion'])
        if not any(selector):
            return charts

        # hourly throughput
        s1 = figure(width=width,
                    height=height,
                    x_axis_type='datetime',
                    title='hourly throughput' + suffix)
        s1.legend.orientation = 'bottom_left'
        s1.circle(self.metrics[selector & self.completed]['completion'],
                  self.metrics[selector & self.completed]['throughput'],
                  color='blue',
                  alpha=0.2,
                  size=12,
                  legend='hourly throughput')
        peak = Span(location=self.metrics[selector]['throughput'].max(),
                    dimension='width',
                    line_color='green',
                    line_dash='dashed',
                    line_width=3)
        s1.renderers.extend([peak])
        charts.append(s1)

        # upload size / pulp upload time
        s2 = figure(width=width,
                    height=height,
                    title='upload size vs pulp upload time' + suffix)
        s2.xaxis.axis_label = 'Time uploading to pulp'
        s2.yaxis.axis_label = 'upload size (Mb)'
        s2.xaxis.formatter = NumeralTickFormatter(format="00:00:00")
        s2.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
        s2.square(self.metrics[selector]['plugin_pulp_push'],
                  self.metrics[selector]['upload_size_mb'],
                  color='orange',
                  alpha=0.2,
                  size=12)
        charts.append(s2)

        # concurrent builds
        s3 = figure(width=width,
                    height=height,
                    title='concurrent builds' + suffix,
                    x_axis_type='datetime')
        which_c = time_selector(self.concurrent['timestamp'])
        s3.line(self.concurrent[which_c]['timestamp'],
                self.concurrent[which_c]['nbuilds'],
                line_color='green',
                line_join='bevel')
        charts.append(s3)

        # squash time vs concurrent builds
        merged = self.metrics[selector].merge(self.concurrent[which_c],
                                              left_on=['completion'],
                                              right_on=['timestamp'],
                                              sort=False)
        sc = BoxPlot(merged,
                     values='plugin_squash',
                     label='nbuilds',
                     width=width,
                     height=height,
                     title='squash time vs (other) concurrent builds' + suffix)
        sc._yaxis.formatter = NumeralTickFormatter(format="00:00:00")
        sc._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
        charts.append(sc)

        # upload_size_mb
        valid = ~np.isnan(self.metrics['upload_size_mb'])
        hsize = MyHistogram(self.metrics['upload_size_mb'][selector][valid],
                            bins=10,
                            title='Upload size' + suffix,
                            plot_width=width,
                            plot_height=height)
        hsize.xaxis.axis_label = 'Mb'
        charts.append(hsize)

        # running time by plugin
        these_metrics = self.metrics[selector]
        for column, bins, title in [
            ('running', None, 'Total build time' + suffix),
            ('plugin_pull_base_image', 15, 'Time pulling base image' + suffix),
            ('plugin_distgit_fetch_artefacts', None,
             'Time fetching sources' + suffix),
            ('docker_build', None, 'Time in docker build' + suffix),
            ('plugin_squash', None, 'Time squashing layers' + suffix),
            ('plugin_pulp_push', None, 'Time uploading to pulp' + suffix),
        ]:
            values = these_metrics[column][~np.isnan(these_metrics[column])]
            h = MyHistogram(values,
                            title=title,
                            x_axis_type='datetime',
                            bins=bins or 10,
                            plot_width=width,
                            plot_height=height)
            h.xaxis.formatter = NumeralTickFormatter(format="00:00:00")
            h.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
            h.yaxis.bounds = (0, len(these_metrics))
            charts.append(h)

        # Now show plugin-level timings for a specific image
        # data looks like:
        # completion  image       plugin_x  plugin_y
        # 2016-03-18  image/name    205       60
        #
        # reshape to:
        # imgae       plugin      value
        # image/name  plugin_x    205
        # image/name  plugin_y    60
        if self.image:
            is_image = self.metrics[selector]['image'] == self.image
            image = self.metrics[selector][is_image]
            timings = pd.melt(image[[
                'image', 'running', 'plugin_pull_base_image',
                'plugin_distgit_fetch_artefacts', 'docker_build',
                'plugin_squash', 'plugin_compress', 'plugin_pulp_push'
            ]],
                              id_vars=['image'],
                              var_name='plugin')
            im = BoxPlot(timings,
                         values='value',
                         label='plugin',
                         width=width,
                         height=height * 2,
                         title='%s timings%s' % (self.image, suffix))
            im._yaxis.formatter = NumeralTickFormatter(format="00:00:00")
            im._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6])
            charts.append(im)

        return charts
Пример #13
0
    def simulate(self,
                 agents: List[Agent],
                 baseline_accuracy: float = None,
                 init_train_data_portion: float = 0.1,
                 pm_test_sets: list = None,
                 accuracy_plot_wait_s=2E5,
                 train_size: int = None, test_size: int = None,
                 ):
        """
        Run a simulation.

        :param agents: The agents that will interact with the data.
        :param baseline_accuracy: The baseline accuracy of the model.
            Usually the accuracy on a hidden test set when the model is trained with all data.
        :param init_train_data_portion: The portion of the data to initially use for training. Must be [0,1].
        :param pm_test_sets: The test sets for the prediction market incentive mechanism.
        :param accuracy_plot_wait_s: The amount of time to wait in seconds between plotting the accuracy.
        :param train_size: The amount of training data to use.
        :param test_size: The amount of test data to use.
        """

        assert 0 <= init_train_data_portion <= 1

        # Data to save.
        save_data = dict(agents=[asdict(a) for a in agents],
                         baselineAccuracy=baseline_accuracy,
                         initTrainDataPortion=init_train_data_portion,
                         accuracies=[],
                         balances=[],
                         )
        time_for_filenames = int(time.time())
        save_path = f'saved_runs/{time_for_filenames}.json'
        plot_save_path = f'saved_runs/{time_for_filenames}_plot.png'
        self._logger.info("Saving run info to \"%s\".", save_path)
        os.makedirs(os.path.dirname(save_path), exist_ok=True)

        # Set up plots.
        doc: Document = curdoc()
        doc.title = "DeCAI Simulation"

        plot = figure(title="Balances & Accuracy on Hidden Test Set",
                      )
        plot.width = 800
        plot.height = 600

        plot.xaxis.axis_label = "Time (days)"
        plot.yaxis.axis_label = "Percent"
        plot.title.text_font_size = '20pt'
        plot.xaxis.major_label_text_font_size = '20pt'
        plot.xaxis.axis_label_text_font_size = '20pt'
        plot.yaxis.major_label_text_font_size = '20pt'
        plot.yaxis.axis_label_text_font_size = '20pt'

        plot.xaxis[0].ticker = AdaptiveTicker(base=5 * 24 * 60 * 60)
        plot.xgrid[0].ticker = AdaptiveTicker(base=24 * 60 * 60)

        balance_plot_sources_per_agent = dict()
        good_colors = cycle([
            colors.named.green,
            colors.named.lawngreen,
            colors.named.darkgreen,
            colors.named.limegreen,
        ])
        bad_colors = cycle([
            colors.named.red,
            colors.named.darkred,
        ])
        for agent in agents:
            source = ColumnDataSource(dict(t=[], b=[]))
            assert agent.address not in balance_plot_sources_per_agent
            balance_plot_sources_per_agent[agent.address] = source
            if agent.calls_model:
                color = 'blue'
                line_dash = 'dashdot'
            elif agent.good:
                color = next(good_colors)
                line_dash = 'dotted'
            else:
                color = next(bad_colors)
                line_dash = 'dashed'
            plot.line(x='t', y='b',
                      line_dash=line_dash,
                      line_width=2,
                      source=source,
                      color=color,
                      legend=f"{agent.address} Balance")

        plot.legend.location = 'top_left'
        plot.legend.label_text_font_size = '12pt'

        # JavaScript code.
        plot.xaxis[0].formatter = FuncTickFormatter(code="""
        return (tick / 86400).toFixed(0);
        """)
        plot.yaxis[0].formatter = PrintfTickFormatter(format="%0.1f%%")

        acc_source = ColumnDataSource(dict(t=[], a=[]))
        if baseline_accuracy is not None:
            plot.ray(x=[0], y=[baseline_accuracy * 100], length=0, angle=0, line_width=2,
                     legend=f"Accuracy when trained with all data: {baseline_accuracy * 100:0.1f}%")
        plot.line(x='t', y='a',
                  line_dash='solid',
                  line_width=2,
                  source=acc_source,
                  color='black',
                  legend="Current Accuracy")

        @gen.coroutine
        def plot_cb(agent: Agent, t, b):
            source = balance_plot_sources_per_agent[agent.address]
            source.stream(dict(t=[t], b=[b * 100 / agent.start_balance]))
            save_data['balances'].append(dict(t=t, a=agent.address, b=b))

        @gen.coroutine
        def plot_accuracy_cb(t, a):
            acc_source.stream(dict(t=[t], a=[a * 100]))
            save_data['accuracies'].append(dict(t=t, accuracy=a))

        continuous_evaluation = not isinstance(self._decai.im, PredictionMarket)

        def task():
            (x_train, y_train), (x_test, y_test) = \
                self._data_loader.load_data(train_size=train_size, test_size=test_size)
            init_idx = int(len(x_train) * init_train_data_portion)
            self._logger.info("Initializing model with %d out of %d samples.",
                              init_idx, len(x_train))
            x_init_data, y_init_data = x_train[:init_idx], y_train[:init_idx]
            x_remaining, y_remaining = x_train[init_idx:], y_train[init_idx:]

            self._decai.model.init_model(x_init_data, y_init_data)

            if self._logger.isEnabledFor(logging.DEBUG):
                s = self._decai.model.evaluate(x_init_data, y_init_data)
                self._logger.debug("Initial training data evaluation: %s", s)
                s = self._decai.model.evaluate(x_remaining, y_remaining)
                self._logger.debug("Remaining training data evaluation: %s", s)

            self._logger.info("Evaluating initial model.")
            accuracy = self._decai.model.evaluate(x_test, y_test)
            self._logger.info("Initial test set accuracy: %0.2f%%", accuracy * 100)
            t = self._time()
            doc.add_next_tick_callback(
                partial(plot_accuracy_cb, t=t, a=accuracy))

            q = PriorityQueue()
            random.shuffle(agents)
            for agent in agents:
                self._balances.initialize(agent.address, agent.start_balance)
                q.put((self._time() + agent.get_next_wait_s(), agent))
                doc.add_next_tick_callback(
                    partial(plot_cb, agent=agent, t=t, b=agent.start_balance))

            unclaimed_data = []
            next_data_index = 0
            next_accuracy_plot_time = 1E4
            desc = "Processing agent requests"
            with tqdm(desc=desc,
                      unit_scale=True, mininterval=2, unit=" requests",
                      total=len(x_remaining),
                      ) as pbar:
                while not q.empty():
                    # For now assume sending a transaction (editing) is free (no gas)
                    # since it should be relatively cheaper than the deposit required to add data.
                    # It may not be cheaper than calling `report`.

                    if next_data_index >= len(x_remaining):
                        if not continuous_evaluation or len(unclaimed_data) == 0:
                            break

                    current_time, agent = q.get()
                    update_balance_plot = False
                    if current_time > next_accuracy_plot_time:
                        self._logger.debug("Evaluating.")
                        next_accuracy_plot_time += accuracy_plot_wait_s
                        accuracy = self._decai.model.evaluate(x_test, y_test)
                        doc.add_next_tick_callback(
                            partial(plot_accuracy_cb, t=current_time, a=accuracy))

                        if continuous_evaluation:
                            self._logger.debug("Unclaimed data: %d", len(unclaimed_data))
                            pbar.set_description(f"{desc} ({len(unclaimed_data)} unclaimed)")

                        with open(save_path, 'w') as f:
                            json.dump(save_data, f, separators=(',', ':'))

                        if os.path.exists(plot_save_path):
                            os.remove(plot_save_path)
                        export_png(plot, plot_save_path)

                    self._time.set_time(current_time)

                    balance = self._balances[agent.address]
                    if balance > 0 and next_data_index < len(x_remaining):
                        # Pick data.
                        x, y = x_remaining[next_data_index], y_remaining[next_data_index]

                        if agent.calls_model:
                            # Only call the model if it's good.
                            if random.random() < accuracy:
                                update_balance_plot = True
                                self._decai.predict(Msg(agent.address, agent.pay_to_call), x)
                        else:
                            if not agent.good:
                                y = 1 - y
                            if agent.prob_mistake > 0 and random.random() < agent.prob_mistake:
                                y = 1 - y

                            # Bad agents always contribute.
                            # Good agents will only work if the model is doing well.
                            # Add a bit of chance they will contribute since 0.85 accuracy is okay.
                            if not agent.good or random.random() < accuracy + 0.15:
                                value = agent.get_next_deposit()
                                if value > balance:
                                    value = balance
                                msg = Msg(agent.address, value)
                                try:
                                    self._decai.add_data(msg, x, y)
                                    # Don't need to plot every time. Plot less as we get more data.
                                    update_balance_plot = next_data_index / len(x_remaining) + 0.1 < random.random()
                                    balance = self._balances[agent.address]
                                    if continuous_evaluation:
                                        unclaimed_data.append((current_time, agent, x, y))
                                    next_data_index += 1
                                    pbar.update()
                                except RejectException:
                                    # Probably failed because they didn't pay enough which is okay.
                                    # Or if not enough time has passed since data was attempted to be added
                                    # which is okay too because a real contract would reject this
                                    # because the smallest unit of time we can use is 1s.
                                    if self._logger.isEnabledFor(logging.DEBUG):
                                        self._logger.exception("Error adding data.")

                    if balance > 0:
                        q.put((current_time + agent.get_next_wait_s(), agent))

                    claimed_indices = []
                    for i in range(len(unclaimed_data)):
                        added_time, adding_agent, x, classification = unclaimed_data[i]
                        if current_time - added_time < self._decai.im.refund_time_s:
                            break
                        if next_data_index >= len(x_remaining) \
                                and current_time - added_time < self._decai.im.any_address_claim_wait_time_s:
                            break
                        balance = self._balances[agent.address]
                        msg = Msg(agent.address, balance)

                        if current_time - added_time > self._decai.im.any_address_claim_wait_time_s:
                            # Attempt to take the entire deposit.
                            try:
                                self._decai.report(msg, x, classification, added_time, adding_agent.address)
                                update_balance_plot = True
                            except RejectException:
                                if self._logger.isEnabledFor(logging.DEBUG):
                                    self._logger.exception("Error taking reward.")
                        elif adding_agent.address == agent.address:
                            try:
                                self._decai.refund(msg, x, classification, added_time)
                                update_balance_plot = True
                            except RejectException:
                                if self._logger.isEnabledFor(logging.DEBUG):
                                    self._logger.exception("Error getting refund.")
                        else:
                            try:
                                self._decai.report(msg, x, classification, added_time, adding_agent.address)
                                update_balance_plot = True
                            except RejectException:
                                if self._logger.isEnabledFor(logging.DEBUG):
                                    self._logger.exception("Error taking reward.")

                        stored_data = self._decai.data_handler.get_data(x, classification,
                                                                        added_time, adding_agent.address)
                        if stored_data.claimable_amount <= 0:
                            claimed_indices.append(i)

                    for i in claimed_indices[::-1]:
                        unclaimed_data.pop(i)

                    if update_balance_plot:
                        balance = self._balances[agent.address]
                        doc.add_next_tick_callback(
                            partial(plot_cb, agent=agent, t=current_time, b=balance))

            self._logger.info("Done going through data.")
            if continuous_evaluation:
                pbar.set_description(f"{desc} ({len(unclaimed_data)} unclaimed)")

            if isinstance(self._decai.im, PredictionMarket):
                self._time.add_time(agents[0].get_next_wait_s())
                self._decai.im.end_market()
                for i, test_set_portion in enumerate(pm_test_sets):
                    if i != self._decai.im.test_reveal_index:
                        self._decai.im.verify_next_test_set(test_set_portion)
                with tqdm(desc="Processing contributions",
                          unit_scale=True, mininterval=2, unit=" contributions",
                          total=self._decai.im.get_num_contributions_in_market(),
                          ) as pbar:
                    finished_first_round_of_rewards = False
                    while self._decai.im.remaining_bounty_rounds > 0:
                        self._time.add_time(agents[0].get_next_wait_s())
                        self._decai.im.process_contribution()
                        pbar.update()

                        if not finished_first_round_of_rewards:
                            accuracy = self._decai.im.prev_acc
                            # If we plot too often then we end up with a blob instead of a line.
                            if random.random() < 0.1:
                                doc.add_next_tick_callback(
                                    partial(plot_accuracy_cb, t=self._time(), a=accuracy))

                        if self._decai.im.state == MarketPhase.REWARD_RESTART:
                            finished_first_round_of_rewards = True
                            if self._decai.im.reset_model_during_reward_phase:
                                # Update the accuracy after resetting all data.
                                accuracy = self._decai.im.prev_acc
                            else:
                                # Use the accuracy after training with all data.
                                pass
                            doc.add_next_tick_callback(
                                partial(plot_accuracy_cb, t=self._time(), a=accuracy))
                            pbar.total += self._decai.im.get_num_contributions_in_market()
                            self._time.add_time(self._time() * 0.001)

                            for agent in agents:
                                balance = self._balances[agent.address]
                                market_bal = self._decai.im._market_balances[agent.address]
                                self._logger.debug("\"%s\" market balance: %0.2f   Balance: %0.2f",
                                                   agent.address, market_bal, balance)
                                doc.add_next_tick_callback(
                                    partial(plot_cb, agent=agent, t=self._time(), b=max(balance + market_bal, 0)))

                self._time.add_time(self._time() * 0.02)
                for agent in agents:
                    msg = Msg(agent.address, 0)
                    # Find data submitted by them.
                    data = None
                    for key, stored_data in self._decai.data_handler:
                        if stored_data.sender == agent.address:
                            data = key[0]
                            break
                    if data is not None:
                        self._decai.refund(msg, np.array(data), stored_data.classification, stored_data.time)
                        balance = self._balances[agent.address]
                        doc.add_next_tick_callback(
                            partial(plot_cb, agent=agent, t=self._time(), b=balance))
                        self._logger.info("Balance for \"%s\": %.2f (%+.2f%%)",
                                          agent.address, balance,
                                          (balance - agent.start_balance) / agent.start_balance * 100)
                    else:
                        self._logger.warning("No data submitted by \"%s\" was found."
                                             "\nWill not update it's balance.", agent.address)

                self._logger.info("Done issuing rewards.")

            accuracy = self._decai.model.evaluate(x_test, y_test)
            doc.add_next_tick_callback(
                partial(plot_accuracy_cb, t=current_time, a=accuracy))

            with open(save_path, 'w') as f:
                json.dump(save_data, f, separators=(',', ':'))

            if os.path.exists(plot_save_path):
                os.remove(plot_save_path)
            export_png(plot, plot_save_path)

        doc.add_root(plot)
        thread = Thread(target=task)
        thread.start()
Пример #14
0
    def create_figure5():
        xsp5 = dfp5[x.value].values
        ysp5 = dfp5[y.value].values
        x_titlep5 = x.value.title()
        y_titlep5 = y.value.title()

        kwp5 = dict()
        if x.value in discrete:
            kwp5['x_range'] = sorted(set(xsp5))
        if y.value in discrete:
            kwp5['y_range'] = sorted(set(ysp5))
        kwp5['title'] = "%s vs %s" % (
            x_titlep5, y_titlep5) + " for {} on {} and {}".format(
                BotName, Par, H4)

        pp5 = figure(plot_height=500,
                     plot_width=800,
                     tools='pan,box_zoom,hover,reset,lasso_select',
                     **kwp5)
        pp5.xaxis.axis_label = x_titlep5
        pp5.yaxis.axis_label = y_titlep5

        if x.value in discrete:
            pp5.xaxis.major_label_orientation = pd.np.pi / 4

        sz = 9
        if size.value != 'None':
            if len(set(dfp5[size.value])) > N_SIZES:
                groups = pd.qcut(dfp5[size.value].values,
                                 N_SIZES,
                                 duplicates='drop')
            else:
                groups = pd.Categorical(dfp5[size.value])
            sz = [SIZES[xx] for xx in groups.codes]

        c = "#31AADE"
        if color.value != 'None':
            if len(set(dfp5[color.value])) > N_COLORS:
                groups = pd.qcut(dfp5[color.value].values,
                                 N_COLORS,
                                 duplicates='drop')
            else:
                groups = pd.Categorical(dfp5[color.value])
            c = [COLORS[xx] for xx in groups.codes]

        # COLOR BAR NEXT TO GRAPHIC
        #PAIR 3
        Var_color_mapper = LinearColorMapper(
            palette="Inferno256",
            low=min(dfp5['Profit']),
            high=max(dfp5['Profit']
                     ))  # arreglar Maximo y minimo para que agarren el valor
        # Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1[color.value]),high=max(dfp1[color.value]))  # arreglar Maximo y minimo para que agarren el valor
        GraphTicker = AdaptiveTicker(base=50,
                                     desired_num_ticks=10,
                                     num_minor_ticks=20,
                                     max_interval=1000)
        Color_legend = ColorBar(color_mapper=Var_color_mapper,
                                ticker=GraphTicker,
                                label_standoff=12,
                                border_line_color=None,
                                location=(0, 0))
        pp5.circle(x=xsp5,
                   y=ysp5,
                   color=c,
                   size=sz,
                   line_color="white",
                   alpha=0.6,
                   hover_color='white',
                   hover_alpha=0.5)
        pp5.add_layout(Color_legend, 'right')
        return pp5
Пример #15
0
def city_stats(request, city):
    """View that renders graphs for case counts in a specific city."""
    city = city.replace('-', ' ').title()
    if city.lower() == 'coto de caza':
        city = 'Coto de Caza'
    all_cities, data_item, dataset = _get_covid_info()
    date_series = dataset['DateSpecCollect']
    case_count_series = dataset[city]
    total_series = dataset['Total']

    # Create a bokeh graph for positive cases by day over the past 2 months.
    days_back = 60
    counts_by_day_plot = figure(title=f'Positive cases by day over the last 60 days -- {case_count_series[days_back * -1:].sum()} total cases.',
                  y_range=date_series[days_back * -1:],
                  plot_width=700,
                  plot_height=days_back * 20,
                  tools="save",
                  x_axis_label="Positive Cases Reported",
                  x_axis_location='above',
                  x_minor_ticks=2)
    counts_by_day_plot.hbar(y=date_series,
              right=case_count_series,
              left=0,
              height=0.4,
              color=RGB(79, 70, 229),
              fill_alpha=0.5,
              line_cap='round',
              hatch_alpha=0.0)
    hbar_script, hbar_div = components(counts_by_day_plot)

    # Create a bokeh line graph plotting total case counts over the past 2 weeks.
    total_past_2_weeks_plot = figure(x_range=date_series[-14:],
                       plot_width=500,
                       plot_height=300,
                       y_axis_label="Total case counts",
                       tools="save",
                       title=f'Total case counts over the last 14 days -- {case_count_series[-14:].sum()} new cases.')
    total_past_2_weeks_plot.xaxis.major_label_orientation = 45
    total_past_2_weeks_plot.line(x=date_series, y=case_count_series.cumsum())
    recent_line_script, recent_line_div = components(total_past_2_weeks_plot)

    # Create bokeh line graph showing total case counts since the start of pandemic.
    total_all_time_plot = figure(x_range=date_series,
                           plot_width=500,
                           plot_height=300,
                           y_axis_label="Total case counts",
                           tools="save",
                           title=f'Total case counts since beginning of pandemic.')
    total_all_time_plot.xaxis.visible = False
    total_all_time_plot.xaxis.ticker = AdaptiveTicker(desired_num_ticks=10)
    total_all_time_plot.line(x=date_series, y=case_count_series.cumsum())
    all_line_script, all_line_div = components(total_all_time_plot)

    # Create a bokeh bar graph showing the percentage of total case by day.
    days_back = 14
    percentage_series = (dataset[city]/dataset['Total']) * 100
    percentage_total_plot = figure(title=f'Percentage of new cases reported in OC attributed to {city}.',
                                   x_range=date_series[-14:],
                                   plot_width=500,
                                   plot_height=500,
                                   y_axis_label='% of new cases reported in the county',
                                   x_axis_label=f'* {city} is an estimated {round(population[city] / total_population * 100, 2)}% of total OC population.',
                                   tools='save')
    percentage_total_plot.vbar(x=date_series[-14:],
                               top=percentage_series[-14:],
                               bottom=0,
                               width=0.4,
                               color=RGB(79, 70, 229),
                               fill_alpha=0.5,
                               line_cap='round',
                               hatch_alpha=0.0)
    percentage_total_plot.xaxis.major_label_orientation = 45
    percentage_total_script, percentage_total_div = components(percentage_total_plot)


    context = {'city': city,
               'hbar_script': hbar_script,
               'hbar_div': hbar_div,
               'recent_line_script': recent_line_script,
               'recent_line_div': recent_line_div,
               'all_line_script': all_line_script,
               'all_line_div': all_line_div,
               'percentage_total_script': percentage_total_script,
               'percentage_total_div': percentage_total_div,
               'total_cases': case_count_series.sum(),
               'all_cities': all_cities,
               'last_updated': datetime.utcnow() - datetime.utcfromtimestamp(
                   data_item.modified / 1000)}
    return render(request, 'app/city_stats.html', context)
Пример #16
0
def block_heatmap(df, height=600, width=900):
    """
    Generates a



    :param df:
        The Pandas DataFrame to render in block-heatmap style.
    :return:
        A Bokeh block heatmap figure modeled after example code.  The figure has additional properties, df for
        the plot data, and rect for the plot object.
    """
    # this colormap blatantly copied from the New York Times.
    colors = [
        "#ADD8E6", "#9AC7E7", "#88B6E9", "#76A5EB", "#6495ED", "#647CD8",
        "#6564C3", "#654BAE", "#663399"
    ]
    mapper = LinearColorMapper(palette=colors, low=0, high=1)
    cols = {i: c for (i, c) in enumerate(df.columns)}
    index = {i: r for (i, r) in enumerate(df.index)}
    cols_by_rows = product(enumerate(df.columns), enumerate(df.index))
    data = np.array([[x, y, c, r, df.loc[r, c]]
                     for ((x, c), (y, r)) in cols_by_rows])
    combination_df = pd.DataFrame(
        data, columns=["gene_id", "sample_id", "gene", "sample", "value"])
    source = ColumnDataSource(combination_df)

    fig = figure(title="Clustered Heatmap",
                 toolbar_location="below",
                 x_range=(0, len(df.columns)),
                 y_range=(0, len(df.index)),
                 tools=["box_zoom", "pan", "reset", "save"],
                 name="heatmap",
                 x_axis_location="above",
                 plot_width=width,
                 plot_height=height,
                 active_drag="box_zoom")
    fig.rect(x="gene_id",
             y="sample_id",
             source=source,
             width=1,
             height=1,
             fill_color={
                 'field': 'value',
                 'transform': mapper
             },
             line_color=None)

    fig.grid.grid_line_color = None
    fig.axis.axis_line_color = None
    fig.axis.major_tick_line_color = None
    fig.axis.major_label_text_font_size = "7pt"
    fig.axis.major_label_standoff = 0
    fig.xaxis.major_label_orientation = np.pi / 3

    fig.yaxis.formatter = FuncTickFormatter(code="""
        var labels = %s;
        return labels[tick] || '';
    """ % index)

    fig.xaxis.formatter = FuncTickFormatter(code="""
        var labels = %s;
        return labels[tick] || '';
    """ % cols)

    fig.yaxis.ticker = FixedTicker(ticks=list(index.keys()))
    fig.xaxis.ticker = AdaptiveTicker(mantissas=list(range(10)),
                                      min_interval=1,
                                      max_interval=5)

    hover = HoverTool(names=["heatmap"])
    hover.tooltips = [('gene', '@gene'), ('sample', '@sample'),
                      ('percentile', '@value%')]
    fig.add_tools(hover)

    return fig
Пример #17
0
    def create_figure():
        print(
            'this is Create Figure on Bokeh Interactive for a single chart on unfiltered Phase 1'
        )
        xs = df[x.value].values
        ys = df[y.value].values
        x_title = x.value.title()
        y_title = y.value.title()

        kw = dict()
        if x.value in discrete:
            kw['x_range'] = sorted(set(xs))
        if y.value in discrete:
            kw['y_range'] = sorted(set(ys))
        kw['title'] = "%s vs %s" % (
            x_title, y_title) + " for {} on {} and {}".format(BotName, i, j)

        p = figure(plot_height=900,
                   plot_width=1700,
                   tools='pan,box_zoom,hover,reset,lasso_select',
                   **kw)
        p.xaxis.axis_label = x_title
        p.yaxis.axis_label = y_title

        if x.value in discrete:
            p.xaxis.major_label_orientation = pd.np.pi / 4

        sz = 9
        if size.value != 'None':
            if len(set(df[size.value])) > N_SIZES:
                groups = pd.qcut(df[size.value].values,
                                 N_SIZES,
                                 duplicates='drop')
            else:
                groups = pd.Categorical(df[size.value])
            sz = [SIZES[xx] for xx in groups.codes]

        c = "#31AADE"
        if color.value != 'None':
            if len(set(df[color.value])) > N_COLORS:
                groups = pd.qcut(df[color.value].values,
                                 N_COLORS,
                                 duplicates='drop')
            else:
                groups = pd.Categorical(df[color.value])
            c = [COLORS[xx] for xx in groups.codes]

        Var_color_mapper = LinearColorMapper(
            palette="Inferno256",
            low=min(df['Profit']),
            high=max(df['Profit']
                     ))  # arreglar Maximo y minimo para que agarren el valor
        #Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(df[color.value]),high=max(df[color.value]))  # arreglar Maximo y minimo para que agarren el valor
        GraphTicker = AdaptiveTicker(base=50,
                                     desired_num_ticks=10,
                                     num_minor_ticks=20,
                                     max_interval=1000)
        Color_legend = ColorBar(
            color_mapper=Var_color_mapper,
            ticker=GraphTicker,
            label_standoff=12,
            border_line_color=None,
            location=(0, 0)
        )  #arreglar LogTicker para que muestre por al escala del color
        p.circle(x=xs,
                 y=ys,
                 color=c,
                 size=sz,
                 line_color="white",
                 alpha=0.1,
                 hover_color='white',
                 hover_alpha=0.1)
        p.add_layout(Color_legend, 'right')
        p.circle(x=xs,
                 y=ys,
                 color=c,
                 size=sz,
                 line_color="white",
                 alpha=0.1,
                 hover_color='white',
                 hover_alpha=0.1)
        return p
Пример #18
0
def app():
    st.title('Analysis on Movies from 2011 - 2021')
    st.markdown(
        f"<p><strong>Disclaimer: </strong><em> This web application was created by Dustin Reyes. </strong></em>",
        unsafe_allow_html=True)
    # st.write("This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed.")
    st.markdown(
        """<p align="justify"><em>This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed. 
                It must be noted that movies with complete information, released in theaters and with reliable sources are only 
                considered for this analysis.</em>""",
        unsafe_allow_html=True)

    # df_movies = pd.read_csv('data/titles_complete_info.csv', usecols = cols)
    df_movies = df_movies_orig.copy()
    title_basics_df = title_basics_df_orig.copy()
    imdb_info_withbudget = imdb_info_withbudget_orig.copy()

    df_movies.dropna(subset=['worldwide_gross', 'metacritic_score'],
                     inplace=True)
    df_movies.reset_index(drop=True, inplace=True)
    df_movies.sort_values(by='release', inplace=True)
    df_movies.reset_index(drop=True, inplace=True)
    df_movies.rename(
        {
            'worldwide_gross': 'Worldwide Gross',
            'metacritic_score': 'Metacritic Score',
            'budget': 'Budget',
            'opening': 'Opening',
            'gross': 'Gross',
            'runtimeMinutes': 'Runtime (Minutes)',
            'averageRating': 'Average Rating',
            'numVotes': 'Number of Votes'
        },
        axis=1,
        inplace=True)

    st.markdown(
        """<p align="justify"> A commercially successful movie not only provides entertainment to the audience but also enables film producers to generate significant profits. 
    Several factors such as veteran actors, social media presence, popularity, and release time are important for profitability, 
    but they do not always guarantee how a movie will have a great reception to the audience. 
    In this page, we sought to understand temporal patterns affecting movie opening performance, 
    see how popular genres change over years, see movie rankings based on chosen metrics, observe movie runtimes across different genres and observe changes in movie ratings and vote averages over time""",
        unsafe_allow_html=True)
    # st.write("See `apps/home.py` to know how to use it.")
    st.markdown(f"<h2> I. Temporal Pattern of Movie Openings",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section aims to analyze the months wherein movies have the best opening performance. 
                        The analysis of temporal patterns across the years enables film makers to strategically release films on months wherein such movies are in demand""",
        unsafe_allow_html=True)

    df_movies['month'] = pd.DatetimeIndex(df_movies['release']).month
    opening_by_month_year = df_movies.groupby(["startYear", "month"
                                               ]).Opening.mean().reset_index()
    newdata = ColumnDataSource(opening_by_month_year)

    mapper = LinearColorMapper(palette=bokeh.palettes.RdBu[9],
                               low=opening_by_month_year["Opening"].min(),
                               high=opening_by_month_year["Opening"].max())

    hover = HoverTool(tooltips=[
        ("Opening", "@Opening{$,}"),
    ])

    TOOLS = [hover, "save,pan,box_zoom,reset,wheel_zoom"]

    p = figure(x_axis_label='Year',
               y_axis_label='Month',
               tools=TOOLS,
               plot_width=900)

    p.rect(x="startYear",
           y="month",
           width=1,
           height=1,
           source=newdata,
           fill_color={
               'field': 'Opening',
               'transform': mapper
           })

    color_bar = ColorBar(color_mapper=mapper,
                         location=(20, 0),
                         label_standoff=18,
                         ticker=AdaptiveTicker(),
                         formatter=NumeralTickFormatter(format="$,"))

    p.add_layout(color_bar, 'right')

    p.title.text = "Movie Opening Performance by Year and Month"
    p.title.align = "center"
    p.title.text_font_size = "20px"
    st.write(p)

    st.markdown(f"<h2> II. Movie Ranking Analysis", unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the rankings of movies per year based on the following criterias: 
    <strong>Budget, Opening, Gross, Worldwide Gross, Metacritic Score, Runtime (Minutes), Average Rating, and Number of Votes</strong>. This section enables
    analysts to know what are the qualities and characteristics that movies that have appeared on these rankings have. """,
        unsafe_allow_html=True)
    years = []
    categories = [
        'Budget', 'Opening', 'Gross', 'Worldwide Gross', 'Metacritic Score',
        'Runtime (Minutes)', 'Average Rating', 'Number of Votes'
    ]

    for i in df_movies['startYear'].unique():
        years.append(i)

    option1 = st.selectbox('Pls select the category', categories)

    option2 = st.selectbox('Pls select the year', years)

    figure1 = movie_analyzer(df_movies, category=option1, year=option2)
    st.plotly_chart(figure1)
    # st.write('You selected:', option)

    st.markdown(f"<h2> III. What are the Most Popular Movie Genres?",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the most popular genres as a WordCloud. The larger the font, the more frequently appearing
                            the word is. From the WordCloud, we can observe that Action Movies were the most popular movie genres among
                            film makers during the last 10 years.""",
        unsafe_allow_html=True)
    # Join the different processed abstracts together.
    colors = ["#BF0A30", "#002868"]
    cmap = LinearSegmentedColormap.from_list("mycmap", colors)

    long_string = ' '.join(df_movies['genres'].values.tolist())

    # Create a WordCloud object
    wordcloud = WordCloud(background_color="white",
                          colormap=cmap,
                          width=1000,
                          height=300,
                          max_font_size=500,
                          relative_scaling=0.3,
                          min_font_size=5)

    # Generate a word cloud
    wordcloud = wordcloud.generate(long_string)

    # Visualize the word cloud
    plt.figure(figsize=(100, 100))
    fig_cld, axes_cld = plt.subplots(1, 1)
    axes_cld.imshow(wordcloud, interpolation="bilinear")

    plt.axis("off")
    st.pyplot(fig_cld)

    st.markdown(f"<h2> IV. Movie Runtimes per Genre Analysis",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes movie runtimes per genre. It is important that we identify the characterictics
                    of movies whose runtimes are not normal as these may or may not affect viewership of the said movie. It is also quite possible that
                    these films are experimental in nature and that the director mainly created the movie for test subjects.""",
        unsafe_allow_html=True)
    genres = title_basics_df['genres'].unique().tolist()
    genres.append('All')

    option3 = st.slider('Pls. choose the number of movies to consider?', 2, 20,
                        10)
    option4 = st.selectbox('Pls select the genre', genres)

    figure2 = runtimemovie_analyzer(title_basics_df,
                                    number=option3,
                                    genre=option4)
    st.plotly_chart(figure2)

    st.markdown(f"<h2> V. Performance for each Genre Across the Years",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section aims to visualize the different performance of each genre based on metrics (opening, gross and worldwide gross) across the years 2011 to 2021.
                      """,
        unsafe_allow_html=True)
    categories = ['Opening', 'Gross', 'Worldwide Gross']
    option5 = st.selectbox('Pls select the category', categories)
    figure3 = genre_opening_analyzer(df_movies, category=option5)
    st.plotly_chart(figure3)

    st.markdown(f"<h2> VI. Average Budget per Genre", unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the average budget per genre across the available data. From the visualization, we
                        can observe that the Action genre has average budgets that were considered as outliers through all the average budgets across genres.
                        Meanwhile, other genres usually have lower budget allocations when being made and such genres include horror, drama, documentaries, comedies.
                      """,
        unsafe_allow_html=True)
    fig = plt.figure(figsize=(15, 10))

    # fliersize is the size of outlier markers
    g = sns.boxplot(x='genres',
                    y='budget',
                    data=imdb_info_withbudget,
                    palette="Set2",
                    linewidth=1,
                    fliersize=1.5)

    g.set(title='Average Budget per Genre',
          ylabel="Average Budget ($M)",
          xlabel="")

    # put a horizontal line on overall mean
    plt.axhline(imdb_info_withbudget.budget.mean(),
                ls='--',
                lw=1,
                color='black')

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    #fig.savefig("filename.png")
    st.pyplot(fig)