def zooming_ticker(): """ Create a composite ticker so that sensible axis values and tick intervals are used at all zoom levels :return: A bokeh composite ticker """ return CompositeTicker(tickers=[ AdaptiveTicker(base=10, mantissas=list(range(1, 10)), min_interval=1, max_interval=1, num_minor_ticks=1), AdaptiveTicker(base=10, mantissas=list(range(1, 10)), min_interval=2, max_interval=2, num_minor_ticks=2), AdaptiveTicker(base=10, mantissas=list(range(1, 10)), min_interval=3, max_interval=3, num_minor_ticks=3), AdaptiveTicker(base=10, mantissas=list(range(1, 10)), min_interval=4, max_interval=4, num_minor_ticks=4), AdaptiveTicker(base=10, mantissas=list(range(1, 10)), min_interval=5, num_minor_ticks=5) ])
def plot_amp_qa(data, name, lower=None, upper=None, amp_keys=None, title=None, plot_height=80, plot_width=700, ymin=None, ymax=None): '''Creates gridplot of 3 camera separated amp plots Args: data: table of per_amp qadata name: metric being plotted (str) Options: lower: list of lower thresholds per camera from get_thresholds() format: [[lower_errB, lowerB], [lower_errR, lowerR], [lower_errZ, lowerZ]] upper: list of upper thresholds per camera from get_thresholds() format : [[upperB, upper_errB], [upperR, upper_errR], [upperZ, upper_errZ]] amp_keys: list of amps that have data title: title for plot, if different than name (str) plot_height, plot_width: height, width of graph in pixels ymin/ymax: lists of y axis ranges for B, R, Z plots, unless data exceeds these Output: Bokeh gridplot object''' if title == None: title = name labels = [(spec, amp) for spec in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] for amp in ['A', 'B', 'C', 'D']] figs = [] for cam in ['B', 'R', 'Z']: if cam == 'B': fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height+25, plot_width=plot_width, ymin=ymin[0], ymax=ymax[0]) if cam == 'R': fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height, plot_width=plot_width, ymin=ymin[1], ymax=ymax[1]) if cam == 'Z': fig = plot_amp_cam_qa(data, name, cam, labels, title, lower=lower, upper=upper, amp_keys=amp_keys, plot_height=plot_height, plot_width=plot_width, ymin=ymin[2], ymax=ymax[2]) if name == "BIAS": fig.yaxis.ticker = AdaptiveTicker(base=10, desired_num_ticks=5, mantissas=np.arange(1, 5.5, 0.5), min_interval=1) fig.yaxis.formatter = NumeralTickFormatter(format='e') else: fig.yaxis.ticker = AdaptiveTicker(base=10, desired_num_ticks=5, mantissas=np.arange(1, 5.5, 0.5), min_interval=1) fig.yaxis.formatter = NumeralTickFormatter(format='a') figs.append(fig) # x-axis labels for spectrograph 0-9 and amplifier A-D axis = bk.figure(x_range=FactorRange(*labels), toolbar_location=None, plot_height=50, plot_width=plot_width, y_axis_location=None) axis.line(x=labels, y=0, line_color=None) axis.grid.grid_line_color=None axis.outline_line_color=None fig = gridplot([[figs[0]], [figs[1]], [figs[2]], [axis]], toolbar_location='right') return fig
def make_plot(source,bgvar): #Define a sequential multi-hue color palette. palette = bokeh.palettes.Plasma[7] #Reverse color order so that dark blue is highest obesity. palette = palette[::-1] #Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. color_mapper = LinearColorMapper(palette = palette) hover = HoverTool(tooltips = [ ('Life Expectancy','@outp_life'),('Predicted Life Expectancy','@outp_outp'),(feb[str(bgvar)],('@'+str(bgvar)))]) #Create color bar. ticker = AdaptiveTicker() color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 20, height = 500, border_line_color=None,location = (0,0), orientation = 'vertical',ticker=ticker) #Create figure object. p = figure(title = 'Life Expectancy', plot_height = 600 , plot_width = 450, toolbar_location = None, tools=[hover],x_range=(-10060000, -10035000), y_range=(4658000, 4685000), x_axis_type="mercator", y_axis_type="mercator") p.add_tile(tile_provider) p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None #Add patch renderer to figure. p.patches('xs','ys', source = source,fill_color = {'field' :str(bgvar), 'transform' : color_mapper}, line_color = 'black', line_width = 0.25, fill_alpha = 0.26) #Specify figure layout. p.add_layout(color_bar, 'right') return p
def plot_fp_temp(data, source): tooltips = ([('cursor obsXY', '($x, $y)')] + [(col_name, '@'+col_name) for col_name in data.columns if col_name not in ['line_color']]) fp_temp = figure(title='Focal Plane Temperature', tools='pan,wheel_zoom,reset,hover,save', tooltips=tooltips, aspect_scale=1, plot_width=950, plot_height=1000) fp_temp.xaxis.axis_label = 'obsX / mm' fp_temp.yaxis.axis_label = 'obsY / mm' fp_temp.hover.show_arrow = True # low = data['temp_color'].min(skipna=True) # high = data['temp_color'].max(skipna=True) # old high was 30 (warning limit) low, high = 15, 35 # colormap isn't auto-updated when new data come in # old palette Magma256 color_mapper = LinearColorMapper(palette=linear_bmy_10_95_c78, low=low, high=high) fp_temp.circle( x='obs_x', y='obs_y', source=source, radius=5, fill_color={'field': 'temp_color', 'transform': color_mapper}, fill_alpha=0.7, line_color='line_color', line_width=1.8, hover_line_color='black') colorbar = ColorBar(color_mapper=color_mapper, # border_line_color=None, ticker=AdaptiveTicker(), orientation='horizontal', title='absolute device temperature / °C', padding=5, location=(300, 0), height=15, width=250) fp_temp.add_layout(colorbar, place='above') # above return fp_temp
def create_figure1(): xsp1 = dfp1[x.value].values ysp1 = dfp1[y.value].values x_titlep1 = x.value.title() y_titlep1 = y.value.title() kwp1 = dict() if x.value in discrete: kwp1['x_range'] = sorted(set(xsp1)) if y.value in discrete: kwp1['y_range'] = sorted(set(ysp1)) kwp1['title'] = "%s vs %s" % (x_titlep1, y_titlep1) + " for {} on {} and {}".format(BotName, EURUSD, TimeFrame) pp1 = figure(plot_height=400, plot_width=800, tools='pan,box_zoom,hover,reset,lasso_select', **kwp1) pp1.xaxis.axis_label = x_titlep1 pp1.yaxis.axis_label = y_titlep1 if x.value in discrete: pp1.xaxis.major_label_orientation = pd.np.pi / 4 sz = 9 if size.value != 'None': if len(set(dfp2[size.value])) > N_SIZES: groups = pd.qcut(dfp2[size.value].values, N_SIZES, duplicates='drop') else: groups = pd.Categorical(dfp2[size.value]) sz = [SIZES[xx] for xx in groups.codes] c = "#31AADE" if color.value != 'None': if len(set(dfp2[color.value])) > N_COLORS: groups = pd.qcut(dfp2[color.value].values, N_COLORS, duplicates='drop') else: groups = pd.Categorical(dfp2[color.value]) c = [COLORS[xx] for xx in groups.codes] # COLOR BAR NEXT TO GRAPHIC #PAIR 1 try: Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1['Profit']),high=max(dfp1['Profit'])) # arreglar Maximo y minimo para que agarren el valor except ValueError: Var_color_mapper = LinearColorMapper(palette="Inferno256",low=0,high=1) print('This {} did not launch Phase {} on {}'.format(BotName,Phase,TimeFrame)) #Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1[color.value]),high=max(dfp1[color.value])) # arreglar Maximo y minimo para que agarren el valor GraphTicker = AdaptiveTicker(base=50,desired_num_ticks=10,num_minor_ticks=20,max_interval=1000) Color_legend = ColorBar(color_mapper=Var_color_mapper,ticker =GraphTicker,label_standoff=12, border_line_color=None,location=(0, 0)) #arreglar LogTicker para que muestre por al escala del color pp1.circle(x=xsp1, y=ysp1, color=c, size=sz, line_color="white", alpha=0.6, hover_color='white', hover_alpha=0.5) pp1.add_layout(Color_legend,'right') return pp1
class TimeTicker(CompositeTicker): """ Generate nice ticks across different time scales. """ __implementation__ = 'time_ticker.coffee' num_minor_ticks = Override(default=4) tickers = Override(default=lambda: [ AdaptiveTicker(mantissas=[1, 2, 5], base=10, min_interval=ONE_NANO, max_interval=500 * ONE_MILLI, num_minor_ticks=5), AdaptiveTicker(mantissas=[1, 2, 5, 10, 15, 20, 30], base=60, min_interval=ONE_SECOND, max_interval=30 * ONE_MINUTE, num_minor_ticks=4), AdaptiveTicker(mantissas=[1, 2, 4, 6, 8, 12], base=24, min_interval=ONE_HOUR, max_interval=None, num_minor_ticks=4) ])
def build_plot(self, permit_type: str, year: int): assert (permit_type == 'total' or permit_type in self.permit_types) assert (year in self.years) plot_info = self.build_data_source(year, permit_type) color_mapper = LinearColorMapper(palette=Palette, low=plot_info['min_count'], high=plot_info['max_count']) clean_title = f"{permit_type.replace('PERMIT - ', '').title()} - {year}" p = figure( title=clean_title, tools='hover,zoom_in, zoom_out', x_axis_location=None, y_axis_location=None, tooltips=[("Zip Code", "@zip"), (clean_title, f"@{plot_info['target_field']}")], ) p.grid.grid_line_color = None p.hover.point_policy = "follow_mouse" p.patches('xs', 'ys', source=plot_info['column_data_source'], fill_color={ 'field': f"{plot_info['target_field']}", 'transform': color_mapper }, fill_alpha=0.7, line_color="white", line_width=0.5) color_bar = ColorBar(color_mapper=color_mapper, ticker=AdaptiveTicker(), border_line_color=None, location=(0, 0)) p.outline_line_color = None p.add_layout(color_bar, 'right') p.toolbar.logo = None p.toolbar_location = None return p
def plot_heatmap(col): '''col is a column name in calibdf, which can be R1R2_sum, residuals, gear_ratio_T, gear_ratio_P''' # preset data for each quantity to be plotted names = {'R1R2_sum': 'R1+R2', 'residuals': 'RMS residuals', 'GEAR_CALIB_T': 'Gear ratio θ', 'GEAR_CALIB_P': 'Gear ratio φ'} units = {'R1R2_sum': ' / mm', 'residuals': ' / mm', 'GEAR_CALIB_T': '', 'GEAR_CALIB_P': ''} lims = {'R1R2_sum': (5.5, 6.5), 'residuals': (0, 0.02), 'GEAR_CALIB_T': (0.8, 1.2), 'GEAR_CALIB_P': (0.8, 1.2)} # begin plot data, calibdf = pcm.data, pcm.calibdf name, unit, lim = names[col], units[col], lims[col] tooltips = ([('cursor obsXY', '($x, $y)')] + [(col, '@'+col) for col in filter_cols(calibdf.columns)]) heatmap = figure( title=f'{name}, expid {data.expid}, {data.mode}', tools='pan,wheel_zoom,reset,hover,save', tooltips=tooltips, frame_width=400, frame_height=400, x_range=(-420, 420), y_range=(-420, 420)) heatmap.xaxis.axis_label = 'obsX / mm' heatmap.yaxis.axis_label = 'obsY / mm' heatmap.hover.show_arrow = True # low = calibdf[quantity].min(skipna=True) # high = calibdf[quantity].max(skipna=True) color_mapper = LinearColorMapper( palette=Magma256, low=lim[0], high=lim[1]) heatmap_src = ColumnDataSource(calibdf) heatmap.circle( x='obs_x', y='obs_y', source=heatmap_src, radius=5, fill_color={'field': col, 'transform': color_mapper}, fill_alpha=0.7, line_color='white', line_width=1.8, hover_line_color='black') colorbar = ColorBar( title=name+unit, color_mapper=color_mapper, ticker=AdaptiveTicker(), orientation='horizontal', padding=5, location=(0, 0), height=10, width=390) heatmap.add_layout(colorbar, place='above') # above return heatmap, heatmap_src
def create_daily_res_plot(res_forecast, load_forecast): """ Graph the res injection forecast. Arguments: res_forecast (list): list of renewable energy injection forecast load_forecast (list): list of load forecast """ # Datetime range time_of_day = [] # Create x-axis # beginning of day today = datetime.datetime.today() beginning_of_day = datetime.datetime(year=today.year, month=today.month, day=today.day) for i in range(len(res_forecast)): time_of_day.append(beginning_of_day + datetime.timedelta(minutes=i * 30)) # Compute 75 percentile percentile = np.percentile(res_forecast, 75) # Initialize dictionaries normal_dict = {'x': [], 'y': [], 'percentage': []} peak_dict = {'x': [], 'y': [], 'percentage': []} for i in range(len(res_forecast)): if res_forecast[i] >= percentile: peak_dict['x'].append(time_of_day[i]) peak_dict['y'].append(res_forecast[i]) peak_dict['percentage'].append( percentage_of(res_forecast[i], load_forecast[i])) else: normal_dict['x'].append(time_of_day[i]) normal_dict['y'].append(res_forecast[i]) normal_dict['percentage'].append( percentage_of(res_forecast[i], load_forecast[i])) # Hover tool to properly display time of day and value on hover hover = HoverTool( tooltips=[("Time of day", "@x{%H:%M}"), ("Forecast Value", "@y MWh"), ("Percentage of Daily Load", "@percentage{1.11} %")], formatters={'@x': 'datetime'}, ) # Create the figure plot = figure( x_axis_label="Time of Day", y_axis_label="Megawatts Per Hour", x_axis_type='datetime', sizing_mode="stretch_width", tools=[ hover, BoxZoomTool(), ResetTool(), LassoSelectTool(), WheelZoomTool(), PanTool(), SaveTool() ], ) plot.xaxis.formatter = DatetimeTickFormatter( minutes=["%H:%M"], hours=["%H:%M"], ) # Set x-range and y-range plot.y_range = Range1d(min(res_forecast) - 200, max(res_forecast) + 100) plot.x_range = Range1d(time_of_day[0] - datetime.timedelta(minutes=5), time_of_day[-1] + datetime.timedelta(minutes=5)) # Set a grid plot.grid.minor_grid_line_color = '#eeeeee' # Set the font and style of labels plot.axis.axis_label_text_font = "raleway" plot.axis.axis_label_text_font_style = "normal" # Set the font of ticks on the axis plot.axis.major_label_text_font = "raleway" # Set the desired ticks plot.xaxis.ticker = DatetimeTicker(desired_num_ticks=24) plot.yaxis.ticker = AdaptiveTicker(desired_num_ticks=20) # Add a line plot plot.line(time_of_day, res_forecast, line_alpha=0.2, color="#264b01", line_width=1.5) # Add two circle plots one for the normal values and one for those that # are at or above the 75-percentile plot.circle('x', 'y', source=normal_dict, size=8, color="#264b01") plot.circle('x', 'y', source=peak_dict, size=15, color="#264b01") return components(plot)
def show_data_visualization(source): """Show the data visualization in a webpage.""" # Set up the plot window # Another map tile option: Vendors.STAMEN_TERRAIN) tile_provider = get_provider(Vendors.CARTODBPOSITRON) sf_lat = (37.73, 37.81) sf_long = (-122.47, -122.359720) sf_xrange = [long_to_merc(long) for long in sf_long] sf_yrange = [lat_to_merc(lat) for lat in sf_lat] plot_options = dict(plot_width=1000, plot_height=800, title='Hourly Net Change in Bikes Docked') p = figure(x_range=sf_xrange, y_range=sf_yrange, x_axis_type="mercator", y_axis_type="mercator", tooltips=[("Net Change", "@net_change"), ("ID", "@id"), ("Station", "@name")], **plot_options) p.add_tile(tile_provider) # Add a color bar palette = RdBu[11] palette.reverse() color_mapper = LinearColorMapper(palette=palette, low=-30, high=30) color_bar = ColorBar(color_mapper=color_mapper, ticker=AdaptiveTicker(), label_standoff=12, border_line_color=None, location=(0,0)) p.add_layout(color_bar, 'right') # Add the station points as circles p.circle(x='x', y='y', size=15, fill_color={'field': 'net_change', 'transform': color_mapper}, fill_alpha=0.8, source=source, ) # add two sliders: one for date, one for hour start_date, end_date = datetime.date(2019,9,1), datetime.date(2019,9,30) date_fmt = '%Y%m%d' # Out of simplicity, setting the dates to ints to make the slider work here date_slider = Slider(start=int(start_date.strftime(date_fmt)), end=int(end_date.strftime(date_fmt)), step=1, value=int(start_date.strftime(date_fmt)), title='Date') hour_slider = Slider(start=0, end=23, value=9, step=1, title="Hour of Day") date_callback = CustomJS(args=dict(source=source), code=""" var data = source.data; var curr_date = cb_obj.value; data['net_change'] = data[curr_date + ' ' + data['curr_hr'][0]]; source.change.emit(); """) hour_callback = CustomJS(args=dict(source=source), code=""" var data = source.data; function pad(n, width, z) { z = z || '0'; n = n + ''; return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n; } var curr_hr = String(cb_obj.value).padStart(2, '0'); data['curr_hr'][0] = curr_hr; data['net_change'] = data[data['curr_date'][0] + ' ' + curr_hr]; source.change.emit(); """) output_file("net_bikes.html") date_slider.js_on_change('value', date_callback) hour_slider.js_on_change('value', hour_callback) # Display on the page show( column( row( widgetbox(date_slider), widgetbox(hour_slider), ), p ) )
def generate_heatmap(data, keywords): colors = heatmap_colors colorkey = 'red-blue' data = data.sort_values(by='pointtimestamp') if 'color' in keywords: if keywords['color'] in colors: colorkey = keywords['color'] mapper = LinearColorMapper(palette=colors[colorkey], low=data['pointvalue'].min(), high=data['pointvalue'].max()) if data['pointvalue'].min() == data['pointvalue'].max(): mapper = LinearColorMapper(palette=colors[colorkey], low=data['pointvalue'].min() - 1, high=data['pointvalue'].max() + 1) source = ColumnDataSource(data) TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom" p = figure(title=data['pointname'][0], y_range=list(reversed(data['date'].unique())), x_range=list(data['time'].unique()), x_axis_location="above", plot_width=1000, plot_height=700, tools=TOOLS, toolbar_location='below', sizing_mode='scale_width') p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.axis.major_label_text_font_size = "5pt" p.axis.major_label_standoff = 0 p.xaxis.major_label_orientation = 3.14 / 3 p.xgrid.grid_line_color = None p.rect(x="time", y="date", width=1, height=.95, source=data, fill_color={ 'field': 'pointvalue', 'transform': mapper }, line_color=None) p.select_one(HoverTool).tooltips = [ ('date', '@date @time'), ('pointvalue', '@pointvalue ' + data['units'][0]), ] color_bar = ColorBar(color_mapper=mapper, ticker=AdaptiveTicker(), formatter=PrintfTickFormatter(format="%d " + data['units'][0]), label_standoff=15, location=(0, 0)) p.add_layout(color_bar, 'right') script, plot = components(p) color_picker = "Colors: <select class='color-picker' name='color'>" for color in colors: selected = '' if colorkey == color: selected = 'selected' color_picker += "<option value='" + color + "' " + selected + ">" + color.title( ) + "</option>" color_picker += "</select>" plot = color_picker + plot return script, plot # Embed figure in template
def get_time_charts(self, time_selector, suffix, width=600, height=350): charts = [] selector = time_selector(self.metrics['completion']) if not any(selector): return charts # hourly throughput s1 = figure(width=width, height=height, x_axis_type='datetime', title='hourly throughput' + suffix) s1.legend.orientation = 'bottom_left' s1.circle(self.metrics[selector & self.completed]['completion'], self.metrics[selector & self.completed]['throughput'], color='blue', alpha=0.2, size=12, legend='hourly throughput') peak = Span(location=self.metrics[selector]['throughput'].max(), dimension='width', line_color='green', line_dash='dashed', line_width=3) s1.renderers.extend([peak]) charts.append(s1) # upload size / pulp upload time s2 = figure(width=width, height=height, title='upload size vs pulp upload time' + suffix) s2.xaxis.axis_label = 'Time uploading to pulp' s2.yaxis.axis_label = 'upload size (Mb)' s2.xaxis.formatter = NumeralTickFormatter(format="00:00:00") s2.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) s2.square(self.metrics[selector]['plugin_pulp_push'], self.metrics[selector]['upload_size_mb'], color='orange', alpha=0.2, size=12) charts.append(s2) # concurrent builds s3 = figure(width=width, height=height, title='concurrent builds' + suffix, x_axis_type='datetime') which_c = time_selector(self.concurrent['timestamp']) s3.line(self.concurrent[which_c]['timestamp'], self.concurrent[which_c]['nbuilds'], line_color='green', line_join='bevel') charts.append(s3) # squash time vs concurrent builds merged = self.metrics[selector].merge(self.concurrent[which_c], left_on=['completion'], right_on=['timestamp'], sort=False) sc = BoxPlot(merged, values='plugin_squash', label='nbuilds', width=width, height=height, title='squash time vs (other) concurrent builds' + suffix) sc._yaxis.formatter = NumeralTickFormatter(format="00:00:00") sc._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) charts.append(sc) # upload_size_mb valid = ~np.isnan(self.metrics['upload_size_mb']) hsize = MyHistogram(self.metrics['upload_size_mb'][selector][valid], bins=10, title='Upload size' + suffix, plot_width=width, plot_height=height) hsize.xaxis.axis_label = 'Mb' charts.append(hsize) # running time by plugin these_metrics = self.metrics[selector] for column, bins, title in [ ('running', None, 'Total build time' + suffix), ('plugin_pull_base_image', 15, 'Time pulling base image' + suffix), ('plugin_distgit_fetch_artefacts', None, 'Time fetching sources' + suffix), ('docker_build', None, 'Time in docker build' + suffix), ('plugin_squash', None, 'Time squashing layers' + suffix), ('plugin_pulp_push', None, 'Time uploading to pulp' + suffix), ]: values = these_metrics[column][~np.isnan(these_metrics[column])] h = MyHistogram(values, title=title, x_axis_type='datetime', bins=bins or 10, plot_width=width, plot_height=height) h.xaxis.formatter = NumeralTickFormatter(format="00:00:00") h.xaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) h.yaxis.bounds = (0, len(these_metrics)) charts.append(h) # Now show plugin-level timings for a specific image # data looks like: # completion image plugin_x plugin_y # 2016-03-18 image/name 205 60 # # reshape to: # imgae plugin value # image/name plugin_x 205 # image/name plugin_y 60 if self.image: is_image = self.metrics[selector]['image'] == self.image image = self.metrics[selector][is_image] timings = pd.melt(image[[ 'image', 'running', 'plugin_pull_base_image', 'plugin_distgit_fetch_artefacts', 'docker_build', 'plugin_squash', 'plugin_compress', 'plugin_pulp_push' ]], id_vars=['image'], var_name='plugin') im = BoxPlot(timings, values='value', label='plugin', width=width, height=height * 2, title='%s timings%s' % (self.image, suffix)) im._yaxis.formatter = NumeralTickFormatter(format="00:00:00") im._yaxis.ticker = AdaptiveTicker(mantissas=[1, 3, 6]) charts.append(im) return charts
def simulate(self, agents: List[Agent], baseline_accuracy: float = None, init_train_data_portion: float = 0.1, pm_test_sets: list = None, accuracy_plot_wait_s=2E5, train_size: int = None, test_size: int = None, ): """ Run a simulation. :param agents: The agents that will interact with the data. :param baseline_accuracy: The baseline accuracy of the model. Usually the accuracy on a hidden test set when the model is trained with all data. :param init_train_data_portion: The portion of the data to initially use for training. Must be [0,1]. :param pm_test_sets: The test sets for the prediction market incentive mechanism. :param accuracy_plot_wait_s: The amount of time to wait in seconds between plotting the accuracy. :param train_size: The amount of training data to use. :param test_size: The amount of test data to use. """ assert 0 <= init_train_data_portion <= 1 # Data to save. save_data = dict(agents=[asdict(a) for a in agents], baselineAccuracy=baseline_accuracy, initTrainDataPortion=init_train_data_portion, accuracies=[], balances=[], ) time_for_filenames = int(time.time()) save_path = f'saved_runs/{time_for_filenames}.json' plot_save_path = f'saved_runs/{time_for_filenames}_plot.png' self._logger.info("Saving run info to \"%s\".", save_path) os.makedirs(os.path.dirname(save_path), exist_ok=True) # Set up plots. doc: Document = curdoc() doc.title = "DeCAI Simulation" plot = figure(title="Balances & Accuracy on Hidden Test Set", ) plot.width = 800 plot.height = 600 plot.xaxis.axis_label = "Time (days)" plot.yaxis.axis_label = "Percent" plot.title.text_font_size = '20pt' plot.xaxis.major_label_text_font_size = '20pt' plot.xaxis.axis_label_text_font_size = '20pt' plot.yaxis.major_label_text_font_size = '20pt' plot.yaxis.axis_label_text_font_size = '20pt' plot.xaxis[0].ticker = AdaptiveTicker(base=5 * 24 * 60 * 60) plot.xgrid[0].ticker = AdaptiveTicker(base=24 * 60 * 60) balance_plot_sources_per_agent = dict() good_colors = cycle([ colors.named.green, colors.named.lawngreen, colors.named.darkgreen, colors.named.limegreen, ]) bad_colors = cycle([ colors.named.red, colors.named.darkred, ]) for agent in agents: source = ColumnDataSource(dict(t=[], b=[])) assert agent.address not in balance_plot_sources_per_agent balance_plot_sources_per_agent[agent.address] = source if agent.calls_model: color = 'blue' line_dash = 'dashdot' elif agent.good: color = next(good_colors) line_dash = 'dotted' else: color = next(bad_colors) line_dash = 'dashed' plot.line(x='t', y='b', line_dash=line_dash, line_width=2, source=source, color=color, legend=f"{agent.address} Balance") plot.legend.location = 'top_left' plot.legend.label_text_font_size = '12pt' # JavaScript code. plot.xaxis[0].formatter = FuncTickFormatter(code=""" return (tick / 86400).toFixed(0); """) plot.yaxis[0].formatter = PrintfTickFormatter(format="%0.1f%%") acc_source = ColumnDataSource(dict(t=[], a=[])) if baseline_accuracy is not None: plot.ray(x=[0], y=[baseline_accuracy * 100], length=0, angle=0, line_width=2, legend=f"Accuracy when trained with all data: {baseline_accuracy * 100:0.1f}%") plot.line(x='t', y='a', line_dash='solid', line_width=2, source=acc_source, color='black', legend="Current Accuracy") @gen.coroutine def plot_cb(agent: Agent, t, b): source = balance_plot_sources_per_agent[agent.address] source.stream(dict(t=[t], b=[b * 100 / agent.start_balance])) save_data['balances'].append(dict(t=t, a=agent.address, b=b)) @gen.coroutine def plot_accuracy_cb(t, a): acc_source.stream(dict(t=[t], a=[a * 100])) save_data['accuracies'].append(dict(t=t, accuracy=a)) continuous_evaluation = not isinstance(self._decai.im, PredictionMarket) def task(): (x_train, y_train), (x_test, y_test) = \ self._data_loader.load_data(train_size=train_size, test_size=test_size) init_idx = int(len(x_train) * init_train_data_portion) self._logger.info("Initializing model with %d out of %d samples.", init_idx, len(x_train)) x_init_data, y_init_data = x_train[:init_idx], y_train[:init_idx] x_remaining, y_remaining = x_train[init_idx:], y_train[init_idx:] self._decai.model.init_model(x_init_data, y_init_data) if self._logger.isEnabledFor(logging.DEBUG): s = self._decai.model.evaluate(x_init_data, y_init_data) self._logger.debug("Initial training data evaluation: %s", s) s = self._decai.model.evaluate(x_remaining, y_remaining) self._logger.debug("Remaining training data evaluation: %s", s) self._logger.info("Evaluating initial model.") accuracy = self._decai.model.evaluate(x_test, y_test) self._logger.info("Initial test set accuracy: %0.2f%%", accuracy * 100) t = self._time() doc.add_next_tick_callback( partial(plot_accuracy_cb, t=t, a=accuracy)) q = PriorityQueue() random.shuffle(agents) for agent in agents: self._balances.initialize(agent.address, agent.start_balance) q.put((self._time() + agent.get_next_wait_s(), agent)) doc.add_next_tick_callback( partial(plot_cb, agent=agent, t=t, b=agent.start_balance)) unclaimed_data = [] next_data_index = 0 next_accuracy_plot_time = 1E4 desc = "Processing agent requests" with tqdm(desc=desc, unit_scale=True, mininterval=2, unit=" requests", total=len(x_remaining), ) as pbar: while not q.empty(): # For now assume sending a transaction (editing) is free (no gas) # since it should be relatively cheaper than the deposit required to add data. # It may not be cheaper than calling `report`. if next_data_index >= len(x_remaining): if not continuous_evaluation or len(unclaimed_data) == 0: break current_time, agent = q.get() update_balance_plot = False if current_time > next_accuracy_plot_time: self._logger.debug("Evaluating.") next_accuracy_plot_time += accuracy_plot_wait_s accuracy = self._decai.model.evaluate(x_test, y_test) doc.add_next_tick_callback( partial(plot_accuracy_cb, t=current_time, a=accuracy)) if continuous_evaluation: self._logger.debug("Unclaimed data: %d", len(unclaimed_data)) pbar.set_description(f"{desc} ({len(unclaimed_data)} unclaimed)") with open(save_path, 'w') as f: json.dump(save_data, f, separators=(',', ':')) if os.path.exists(plot_save_path): os.remove(plot_save_path) export_png(plot, plot_save_path) self._time.set_time(current_time) balance = self._balances[agent.address] if balance > 0 and next_data_index < len(x_remaining): # Pick data. x, y = x_remaining[next_data_index], y_remaining[next_data_index] if agent.calls_model: # Only call the model if it's good. if random.random() < accuracy: update_balance_plot = True self._decai.predict(Msg(agent.address, agent.pay_to_call), x) else: if not agent.good: y = 1 - y if agent.prob_mistake > 0 and random.random() < agent.prob_mistake: y = 1 - y # Bad agents always contribute. # Good agents will only work if the model is doing well. # Add a bit of chance they will contribute since 0.85 accuracy is okay. if not agent.good or random.random() < accuracy + 0.15: value = agent.get_next_deposit() if value > balance: value = balance msg = Msg(agent.address, value) try: self._decai.add_data(msg, x, y) # Don't need to plot every time. Plot less as we get more data. update_balance_plot = next_data_index / len(x_remaining) + 0.1 < random.random() balance = self._balances[agent.address] if continuous_evaluation: unclaimed_data.append((current_time, agent, x, y)) next_data_index += 1 pbar.update() except RejectException: # Probably failed because they didn't pay enough which is okay. # Or if not enough time has passed since data was attempted to be added # which is okay too because a real contract would reject this # because the smallest unit of time we can use is 1s. if self._logger.isEnabledFor(logging.DEBUG): self._logger.exception("Error adding data.") if balance > 0: q.put((current_time + agent.get_next_wait_s(), agent)) claimed_indices = [] for i in range(len(unclaimed_data)): added_time, adding_agent, x, classification = unclaimed_data[i] if current_time - added_time < self._decai.im.refund_time_s: break if next_data_index >= len(x_remaining) \ and current_time - added_time < self._decai.im.any_address_claim_wait_time_s: break balance = self._balances[agent.address] msg = Msg(agent.address, balance) if current_time - added_time > self._decai.im.any_address_claim_wait_time_s: # Attempt to take the entire deposit. try: self._decai.report(msg, x, classification, added_time, adding_agent.address) update_balance_plot = True except RejectException: if self._logger.isEnabledFor(logging.DEBUG): self._logger.exception("Error taking reward.") elif adding_agent.address == agent.address: try: self._decai.refund(msg, x, classification, added_time) update_balance_plot = True except RejectException: if self._logger.isEnabledFor(logging.DEBUG): self._logger.exception("Error getting refund.") else: try: self._decai.report(msg, x, classification, added_time, adding_agent.address) update_balance_plot = True except RejectException: if self._logger.isEnabledFor(logging.DEBUG): self._logger.exception("Error taking reward.") stored_data = self._decai.data_handler.get_data(x, classification, added_time, adding_agent.address) if stored_data.claimable_amount <= 0: claimed_indices.append(i) for i in claimed_indices[::-1]: unclaimed_data.pop(i) if update_balance_plot: balance = self._balances[agent.address] doc.add_next_tick_callback( partial(plot_cb, agent=agent, t=current_time, b=balance)) self._logger.info("Done going through data.") if continuous_evaluation: pbar.set_description(f"{desc} ({len(unclaimed_data)} unclaimed)") if isinstance(self._decai.im, PredictionMarket): self._time.add_time(agents[0].get_next_wait_s()) self._decai.im.end_market() for i, test_set_portion in enumerate(pm_test_sets): if i != self._decai.im.test_reveal_index: self._decai.im.verify_next_test_set(test_set_portion) with tqdm(desc="Processing contributions", unit_scale=True, mininterval=2, unit=" contributions", total=self._decai.im.get_num_contributions_in_market(), ) as pbar: finished_first_round_of_rewards = False while self._decai.im.remaining_bounty_rounds > 0: self._time.add_time(agents[0].get_next_wait_s()) self._decai.im.process_contribution() pbar.update() if not finished_first_round_of_rewards: accuracy = self._decai.im.prev_acc # If we plot too often then we end up with a blob instead of a line. if random.random() < 0.1: doc.add_next_tick_callback( partial(plot_accuracy_cb, t=self._time(), a=accuracy)) if self._decai.im.state == MarketPhase.REWARD_RESTART: finished_first_round_of_rewards = True if self._decai.im.reset_model_during_reward_phase: # Update the accuracy after resetting all data. accuracy = self._decai.im.prev_acc else: # Use the accuracy after training with all data. pass doc.add_next_tick_callback( partial(plot_accuracy_cb, t=self._time(), a=accuracy)) pbar.total += self._decai.im.get_num_contributions_in_market() self._time.add_time(self._time() * 0.001) for agent in agents: balance = self._balances[agent.address] market_bal = self._decai.im._market_balances[agent.address] self._logger.debug("\"%s\" market balance: %0.2f Balance: %0.2f", agent.address, market_bal, balance) doc.add_next_tick_callback( partial(plot_cb, agent=agent, t=self._time(), b=max(balance + market_bal, 0))) self._time.add_time(self._time() * 0.02) for agent in agents: msg = Msg(agent.address, 0) # Find data submitted by them. data = None for key, stored_data in self._decai.data_handler: if stored_data.sender == agent.address: data = key[0] break if data is not None: self._decai.refund(msg, np.array(data), stored_data.classification, stored_data.time) balance = self._balances[agent.address] doc.add_next_tick_callback( partial(plot_cb, agent=agent, t=self._time(), b=balance)) self._logger.info("Balance for \"%s\": %.2f (%+.2f%%)", agent.address, balance, (balance - agent.start_balance) / agent.start_balance * 100) else: self._logger.warning("No data submitted by \"%s\" was found." "\nWill not update it's balance.", agent.address) self._logger.info("Done issuing rewards.") accuracy = self._decai.model.evaluate(x_test, y_test) doc.add_next_tick_callback( partial(plot_accuracy_cb, t=current_time, a=accuracy)) with open(save_path, 'w') as f: json.dump(save_data, f, separators=(',', ':')) if os.path.exists(plot_save_path): os.remove(plot_save_path) export_png(plot, plot_save_path) doc.add_root(plot) thread = Thread(target=task) thread.start()
def create_figure5(): xsp5 = dfp5[x.value].values ysp5 = dfp5[y.value].values x_titlep5 = x.value.title() y_titlep5 = y.value.title() kwp5 = dict() if x.value in discrete: kwp5['x_range'] = sorted(set(xsp5)) if y.value in discrete: kwp5['y_range'] = sorted(set(ysp5)) kwp5['title'] = "%s vs %s" % ( x_titlep5, y_titlep5) + " for {} on {} and {}".format( BotName, Par, H4) pp5 = figure(plot_height=500, plot_width=800, tools='pan,box_zoom,hover,reset,lasso_select', **kwp5) pp5.xaxis.axis_label = x_titlep5 pp5.yaxis.axis_label = y_titlep5 if x.value in discrete: pp5.xaxis.major_label_orientation = pd.np.pi / 4 sz = 9 if size.value != 'None': if len(set(dfp5[size.value])) > N_SIZES: groups = pd.qcut(dfp5[size.value].values, N_SIZES, duplicates='drop') else: groups = pd.Categorical(dfp5[size.value]) sz = [SIZES[xx] for xx in groups.codes] c = "#31AADE" if color.value != 'None': if len(set(dfp5[color.value])) > N_COLORS: groups = pd.qcut(dfp5[color.value].values, N_COLORS, duplicates='drop') else: groups = pd.Categorical(dfp5[color.value]) c = [COLORS[xx] for xx in groups.codes] # COLOR BAR NEXT TO GRAPHIC #PAIR 3 Var_color_mapper = LinearColorMapper( palette="Inferno256", low=min(dfp5['Profit']), high=max(dfp5['Profit'] )) # arreglar Maximo y minimo para que agarren el valor # Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(dfp1[color.value]),high=max(dfp1[color.value])) # arreglar Maximo y minimo para que agarren el valor GraphTicker = AdaptiveTicker(base=50, desired_num_ticks=10, num_minor_ticks=20, max_interval=1000) Color_legend = ColorBar(color_mapper=Var_color_mapper, ticker=GraphTicker, label_standoff=12, border_line_color=None, location=(0, 0)) pp5.circle(x=xsp5, y=ysp5, color=c, size=sz, line_color="white", alpha=0.6, hover_color='white', hover_alpha=0.5) pp5.add_layout(Color_legend, 'right') return pp5
def city_stats(request, city): """View that renders graphs for case counts in a specific city.""" city = city.replace('-', ' ').title() if city.lower() == 'coto de caza': city = 'Coto de Caza' all_cities, data_item, dataset = _get_covid_info() date_series = dataset['DateSpecCollect'] case_count_series = dataset[city] total_series = dataset['Total'] # Create a bokeh graph for positive cases by day over the past 2 months. days_back = 60 counts_by_day_plot = figure(title=f'Positive cases by day over the last 60 days -- {case_count_series[days_back * -1:].sum()} total cases.', y_range=date_series[days_back * -1:], plot_width=700, plot_height=days_back * 20, tools="save", x_axis_label="Positive Cases Reported", x_axis_location='above', x_minor_ticks=2) counts_by_day_plot.hbar(y=date_series, right=case_count_series, left=0, height=0.4, color=RGB(79, 70, 229), fill_alpha=0.5, line_cap='round', hatch_alpha=0.0) hbar_script, hbar_div = components(counts_by_day_plot) # Create a bokeh line graph plotting total case counts over the past 2 weeks. total_past_2_weeks_plot = figure(x_range=date_series[-14:], plot_width=500, plot_height=300, y_axis_label="Total case counts", tools="save", title=f'Total case counts over the last 14 days -- {case_count_series[-14:].sum()} new cases.') total_past_2_weeks_plot.xaxis.major_label_orientation = 45 total_past_2_weeks_plot.line(x=date_series, y=case_count_series.cumsum()) recent_line_script, recent_line_div = components(total_past_2_weeks_plot) # Create bokeh line graph showing total case counts since the start of pandemic. total_all_time_plot = figure(x_range=date_series, plot_width=500, plot_height=300, y_axis_label="Total case counts", tools="save", title=f'Total case counts since beginning of pandemic.') total_all_time_plot.xaxis.visible = False total_all_time_plot.xaxis.ticker = AdaptiveTicker(desired_num_ticks=10) total_all_time_plot.line(x=date_series, y=case_count_series.cumsum()) all_line_script, all_line_div = components(total_all_time_plot) # Create a bokeh bar graph showing the percentage of total case by day. days_back = 14 percentage_series = (dataset[city]/dataset['Total']) * 100 percentage_total_plot = figure(title=f'Percentage of new cases reported in OC attributed to {city}.', x_range=date_series[-14:], plot_width=500, plot_height=500, y_axis_label='% of new cases reported in the county', x_axis_label=f'* {city} is an estimated {round(population[city] / total_population * 100, 2)}% of total OC population.', tools='save') percentage_total_plot.vbar(x=date_series[-14:], top=percentage_series[-14:], bottom=0, width=0.4, color=RGB(79, 70, 229), fill_alpha=0.5, line_cap='round', hatch_alpha=0.0) percentage_total_plot.xaxis.major_label_orientation = 45 percentage_total_script, percentage_total_div = components(percentage_total_plot) context = {'city': city, 'hbar_script': hbar_script, 'hbar_div': hbar_div, 'recent_line_script': recent_line_script, 'recent_line_div': recent_line_div, 'all_line_script': all_line_script, 'all_line_div': all_line_div, 'percentage_total_script': percentage_total_script, 'percentage_total_div': percentage_total_div, 'total_cases': case_count_series.sum(), 'all_cities': all_cities, 'last_updated': datetime.utcnow() - datetime.utcfromtimestamp( data_item.modified / 1000)} return render(request, 'app/city_stats.html', context)
def block_heatmap(df, height=600, width=900): """ Generates a :param df: The Pandas DataFrame to render in block-heatmap style. :return: A Bokeh block heatmap figure modeled after example code. The figure has additional properties, df for the plot data, and rect for the plot object. """ # this colormap blatantly copied from the New York Times. colors = [ "#ADD8E6", "#9AC7E7", "#88B6E9", "#76A5EB", "#6495ED", "#647CD8", "#6564C3", "#654BAE", "#663399" ] mapper = LinearColorMapper(palette=colors, low=0, high=1) cols = {i: c for (i, c) in enumerate(df.columns)} index = {i: r for (i, r) in enumerate(df.index)} cols_by_rows = product(enumerate(df.columns), enumerate(df.index)) data = np.array([[x, y, c, r, df.loc[r, c]] for ((x, c), (y, r)) in cols_by_rows]) combination_df = pd.DataFrame( data, columns=["gene_id", "sample_id", "gene", "sample", "value"]) source = ColumnDataSource(combination_df) fig = figure(title="Clustered Heatmap", toolbar_location="below", x_range=(0, len(df.columns)), y_range=(0, len(df.index)), tools=["box_zoom", "pan", "reset", "save"], name="heatmap", x_axis_location="above", plot_width=width, plot_height=height, active_drag="box_zoom") fig.rect(x="gene_id", y="sample_id", source=source, width=1, height=1, fill_color={ 'field': 'value', 'transform': mapper }, line_color=None) fig.grid.grid_line_color = None fig.axis.axis_line_color = None fig.axis.major_tick_line_color = None fig.axis.major_label_text_font_size = "7pt" fig.axis.major_label_standoff = 0 fig.xaxis.major_label_orientation = np.pi / 3 fig.yaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick] || ''; """ % index) fig.xaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick] || ''; """ % cols) fig.yaxis.ticker = FixedTicker(ticks=list(index.keys())) fig.xaxis.ticker = AdaptiveTicker(mantissas=list(range(10)), min_interval=1, max_interval=5) hover = HoverTool(names=["heatmap"]) hover.tooltips = [('gene', '@gene'), ('sample', '@sample'), ('percentile', '@value%')] fig.add_tools(hover) return fig
def create_figure(): print( 'this is Create Figure on Bokeh Interactive for a single chart on unfiltered Phase 1' ) xs = df[x.value].values ys = df[y.value].values x_title = x.value.title() y_title = y.value.title() kw = dict() if x.value in discrete: kw['x_range'] = sorted(set(xs)) if y.value in discrete: kw['y_range'] = sorted(set(ys)) kw['title'] = "%s vs %s" % ( x_title, y_title) + " for {} on {} and {}".format(BotName, i, j) p = figure(plot_height=900, plot_width=1700, tools='pan,box_zoom,hover,reset,lasso_select', **kw) p.xaxis.axis_label = x_title p.yaxis.axis_label = y_title if x.value in discrete: p.xaxis.major_label_orientation = pd.np.pi / 4 sz = 9 if size.value != 'None': if len(set(df[size.value])) > N_SIZES: groups = pd.qcut(df[size.value].values, N_SIZES, duplicates='drop') else: groups = pd.Categorical(df[size.value]) sz = [SIZES[xx] for xx in groups.codes] c = "#31AADE" if color.value != 'None': if len(set(df[color.value])) > N_COLORS: groups = pd.qcut(df[color.value].values, N_COLORS, duplicates='drop') else: groups = pd.Categorical(df[color.value]) c = [COLORS[xx] for xx in groups.codes] Var_color_mapper = LinearColorMapper( palette="Inferno256", low=min(df['Profit']), high=max(df['Profit'] )) # arreglar Maximo y minimo para que agarren el valor #Var_color_mapper = LinearColorMapper(palette="Inferno256",low=min(df[color.value]),high=max(df[color.value])) # arreglar Maximo y minimo para que agarren el valor GraphTicker = AdaptiveTicker(base=50, desired_num_ticks=10, num_minor_ticks=20, max_interval=1000) Color_legend = ColorBar( color_mapper=Var_color_mapper, ticker=GraphTicker, label_standoff=12, border_line_color=None, location=(0, 0) ) #arreglar LogTicker para que muestre por al escala del color p.circle(x=xs, y=ys, color=c, size=sz, line_color="white", alpha=0.1, hover_color='white', hover_alpha=0.1) p.add_layout(Color_legend, 'right') p.circle(x=xs, y=ys, color=c, size=sz, line_color="white", alpha=0.1, hover_color='white', hover_alpha=0.1) return p
def app(): st.title('Analysis on Movies from 2011 - 2021') st.markdown( f"<p><strong>Disclaimer: </strong><em> This web application was created by Dustin Reyes. </strong></em>", unsafe_allow_html=True) # st.write("This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed.") st.markdown( """<p align="justify"><em>This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed. It must be noted that movies with complete information, released in theaters and with reliable sources are only considered for this analysis.</em>""", unsafe_allow_html=True) # df_movies = pd.read_csv('data/titles_complete_info.csv', usecols = cols) df_movies = df_movies_orig.copy() title_basics_df = title_basics_df_orig.copy() imdb_info_withbudget = imdb_info_withbudget_orig.copy() df_movies.dropna(subset=['worldwide_gross', 'metacritic_score'], inplace=True) df_movies.reset_index(drop=True, inplace=True) df_movies.sort_values(by='release', inplace=True) df_movies.reset_index(drop=True, inplace=True) df_movies.rename( { 'worldwide_gross': 'Worldwide Gross', 'metacritic_score': 'Metacritic Score', 'budget': 'Budget', 'opening': 'Opening', 'gross': 'Gross', 'runtimeMinutes': 'Runtime (Minutes)', 'averageRating': 'Average Rating', 'numVotes': 'Number of Votes' }, axis=1, inplace=True) st.markdown( """<p align="justify"> A commercially successful movie not only provides entertainment to the audience but also enables film producers to generate significant profits. Several factors such as veteran actors, social media presence, popularity, and release time are important for profitability, but they do not always guarantee how a movie will have a great reception to the audience. In this page, we sought to understand temporal patterns affecting movie opening performance, see how popular genres change over years, see movie rankings based on chosen metrics, observe movie runtimes across different genres and observe changes in movie ratings and vote averages over time""", unsafe_allow_html=True) # st.write("See `apps/home.py` to know how to use it.") st.markdown(f"<h2> I. Temporal Pattern of Movie Openings", unsafe_allow_html=True) st.markdown( """<p align="justify">This section aims to analyze the months wherein movies have the best opening performance. The analysis of temporal patterns across the years enables film makers to strategically release films on months wherein such movies are in demand""", unsafe_allow_html=True) df_movies['month'] = pd.DatetimeIndex(df_movies['release']).month opening_by_month_year = df_movies.groupby(["startYear", "month" ]).Opening.mean().reset_index() newdata = ColumnDataSource(opening_by_month_year) mapper = LinearColorMapper(palette=bokeh.palettes.RdBu[9], low=opening_by_month_year["Opening"].min(), high=opening_by_month_year["Opening"].max()) hover = HoverTool(tooltips=[ ("Opening", "@Opening{$,}"), ]) TOOLS = [hover, "save,pan,box_zoom,reset,wheel_zoom"] p = figure(x_axis_label='Year', y_axis_label='Month', tools=TOOLS, plot_width=900) p.rect(x="startYear", y="month", width=1, height=1, source=newdata, fill_color={ 'field': 'Opening', 'transform': mapper }) color_bar = ColorBar(color_mapper=mapper, location=(20, 0), label_standoff=18, ticker=AdaptiveTicker(), formatter=NumeralTickFormatter(format="$,")) p.add_layout(color_bar, 'right') p.title.text = "Movie Opening Performance by Year and Month" p.title.align = "center" p.title.text_font_size = "20px" st.write(p) st.markdown(f"<h2> II. Movie Ranking Analysis", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the rankings of movies per year based on the following criterias: <strong>Budget, Opening, Gross, Worldwide Gross, Metacritic Score, Runtime (Minutes), Average Rating, and Number of Votes</strong>. This section enables analysts to know what are the qualities and characteristics that movies that have appeared on these rankings have. """, unsafe_allow_html=True) years = [] categories = [ 'Budget', 'Opening', 'Gross', 'Worldwide Gross', 'Metacritic Score', 'Runtime (Minutes)', 'Average Rating', 'Number of Votes' ] for i in df_movies['startYear'].unique(): years.append(i) option1 = st.selectbox('Pls select the category', categories) option2 = st.selectbox('Pls select the year', years) figure1 = movie_analyzer(df_movies, category=option1, year=option2) st.plotly_chart(figure1) # st.write('You selected:', option) st.markdown(f"<h2> III. What are the Most Popular Movie Genres?", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the most popular genres as a WordCloud. The larger the font, the more frequently appearing the word is. From the WordCloud, we can observe that Action Movies were the most popular movie genres among film makers during the last 10 years.""", unsafe_allow_html=True) # Join the different processed abstracts together. colors = ["#BF0A30", "#002868"] cmap = LinearSegmentedColormap.from_list("mycmap", colors) long_string = ' '.join(df_movies['genres'].values.tolist()) # Create a WordCloud object wordcloud = WordCloud(background_color="white", colormap=cmap, width=1000, height=300, max_font_size=500, relative_scaling=0.3, min_font_size=5) # Generate a word cloud wordcloud = wordcloud.generate(long_string) # Visualize the word cloud plt.figure(figsize=(100, 100)) fig_cld, axes_cld = plt.subplots(1, 1) axes_cld.imshow(wordcloud, interpolation="bilinear") plt.axis("off") st.pyplot(fig_cld) st.markdown(f"<h2> IV. Movie Runtimes per Genre Analysis", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes movie runtimes per genre. It is important that we identify the characterictics of movies whose runtimes are not normal as these may or may not affect viewership of the said movie. It is also quite possible that these films are experimental in nature and that the director mainly created the movie for test subjects.""", unsafe_allow_html=True) genres = title_basics_df['genres'].unique().tolist() genres.append('All') option3 = st.slider('Pls. choose the number of movies to consider?', 2, 20, 10) option4 = st.selectbox('Pls select the genre', genres) figure2 = runtimemovie_analyzer(title_basics_df, number=option3, genre=option4) st.plotly_chart(figure2) st.markdown(f"<h2> V. Performance for each Genre Across the Years", unsafe_allow_html=True) st.markdown( """<p align="justify">This section aims to visualize the different performance of each genre based on metrics (opening, gross and worldwide gross) across the years 2011 to 2021. """, unsafe_allow_html=True) categories = ['Opening', 'Gross', 'Worldwide Gross'] option5 = st.selectbox('Pls select the category', categories) figure3 = genre_opening_analyzer(df_movies, category=option5) st.plotly_chart(figure3) st.markdown(f"<h2> VI. Average Budget per Genre", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the average budget per genre across the available data. From the visualization, we can observe that the Action genre has average budgets that were considered as outliers through all the average budgets across genres. Meanwhile, other genres usually have lower budget allocations when being made and such genres include horror, drama, documentaries, comedies. """, unsafe_allow_html=True) fig = plt.figure(figsize=(15, 10)) # fliersize is the size of outlier markers g = sns.boxplot(x='genres', y='budget', data=imdb_info_withbudget, palette="Set2", linewidth=1, fliersize=1.5) g.set(title='Average Budget per Genre', ylabel="Average Budget ($M)", xlabel="") # put a horizontal line on overall mean plt.axhline(imdb_info_withbudget.budget.mean(), ls='--', lw=1, color='black') plt.xticks(rotation=45, ha='right') plt.tight_layout() #fig.savefig("filename.png") st.pyplot(fig)