df.columns = ["Index", "EI", "id", "x", "y"] x = df['x'].values y = df['y'].values EIvals = df['EI'].values psource = ColumnDataSource(data=dict( x=x, y=y, EIvals=EIvals, )) return psource source = read_data() # low=math.floor(np.min(df['EI'].values)) # high=math.ceil(np.max(df['EI'].values)) mapper = LinearColorMapper(palette=Viridis256, low=0, high=1) color_bar = ColorBar(color_mapper=mapper, location=(0, 0)) app = Flask(__name__) posts_data = [ { 'bug': 'Red Mite', 'predator': 'Birdy Bird', 'crops_affected': "really can't say", 'ei_val': '-999999' } #, # { # 'author': 'Jane Doe', # 'title': 'Blog Post 2', # 'content': 'Second post content',
def parallel_plot(df, color=None, palette=None): """From a dataframe create a parallel coordinate plot """ npts = df.shape[0] ndims = len(df.columns) if color is None: color = np.ones(npts) if palette is None: palette = ['#ff0000'] cmap = LinearColorMapper(high=color.min(), low=color.max(), palette=palette) data_source = ColumnDataSource( dict(xs=np.arange(ndims)[None, :].repeat(npts, axis=0).tolist(), ys=np.array((df - df.min()) / (df.max() - df.min())).tolist(), color=color)) p = figure(x_range=(-1, ndims), y_range=(0, 1), width=1000, tools="pan, box_zoom") # Create x axis ticks from columns contained in dataframe fixed_x_ticks = FixedTicker(ticks=np.arange(ndims), minor_ticks=[]) formatter_x_ticks = FuncTickFormatter(code="return columns[index]", args={"columns": df.columns}) p.xaxis.ticker = fixed_x_ticks p.xaxis.formatter = formatter_x_ticks p.yaxis.visible = False p.y_range.start = 0 p.y_range.end = 1 p.y_range.bounds = (-0.1, 1.1) # add a little padding around y axis p.xgrid.visible = False p.ygrid.visible = False # Create extra y axis for each dataframe column tickformatter = BasicTickFormatter(precision=1) for index, col in enumerate(df.columns): start = df[col].min() end = df[col].max() bound_min = start + abs(end - start) * (p.y_range.bounds[0] - p.y_range.start) bound_max = end + abs(end - start) * (p.y_range.bounds[1] - p.y_range.end) p.extra_y_ranges.update({ col: Range1d(start=bound_min, end=bound_max, bounds=(bound_min, bound_max)) }) fixedticks = FixedTicker(ticks=np.linspace(start, end, 8), minor_ticks=[]) p.add_layout( LinearAxis(fixed_location=index, y_range_name=col, ticker=fixedticks, formatter=tickformatter), 'right') # create the data renderer ( MultiLine ) # specify selected and non selected style non_selected_line_style = dict(line_color='grey', line_width=0.1, line_alpha=0.5) selected_line_style = dict(line_color={ 'field': 'color', 'transform': cmap }, line_width=1) parallel_renderer = p.multi_line(xs="xs", ys="ys", source=data_source, **non_selected_line_style) # Specify selection style selected_lines = MultiLine(**selected_line_style) # Specify non selection style nonselected_lines = MultiLine(**non_selected_line_style) parallel_renderer.selection_glyph = selected_lines parallel_renderer.nonselection_glyph = nonselected_lines p.y_range.start = p.y_range.bounds[0] p.y_range.end = p.y_range.bounds[1] rect_source = ColumnDataSource({ 'x': [], 'y': [], 'width': [], 'height': [] }) # add rectangle selections selection_renderer = p.rect(x='x', y='y', width='width', height='height', source=rect_source, fill_alpha=0.7, fill_color='#009933') selection_tool = ParallelSelectionTool(renderer_select=selection_renderer, renderer_data=parallel_renderer, box_width=10) # custom resets (reset only axes not selections) reset_axes = ParallelResetTool() # add tools and activate selection ones p.add_tools(selection_tool, reset_axes) p.toolbar.active_drag = selection_tool return p
count = 0 for i in range(56, 70): elements.period[i] = 'La' elements.group[i] = str(count + 4) count += 1 count = 0 for i in range(88, 102): elements.period[i] = 'Ac' elements.group[i] = str(count + 4) count += 1 #Define matplotlib and bokeh color map if log_scale == 0: color_mapper = LinearColorMapper(palette=bokeh_palette, low=min(data), high=max(data)) norm = Normalize(vmin=min(data), vmax=max(data)) elif log_scale == 1: for datum in data: if datum < 0: raise ValueError('Entry for element ' + datum + ' is negative but' ' log-scale is selected') color_mapper = LogColorMapper(palette=bokeh_palette, low=min(data), high=max(data)) norm = LogNorm(vmin=min(data), vmax=max(data)) color_scale = ScalarMappable(norm=norm, cmap=cmap).to_rgba(data, alpha=None) #Define color for blank entries blank_color = '#c4c4c4'
def app(): st.title('Analysis on Movies from 2011 - 2021') st.markdown( f"<p><strong>Disclaimer: </strong><em> This web application was created by Dustin Reyes. </strong></em>", unsafe_allow_html=True) # st.write("This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed.") st.markdown( """<p align="justify"><em>This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed. It must be noted that movies with complete information, released in theaters and with reliable sources are only considered for this analysis.</em>""", unsafe_allow_html=True) # df_movies = pd.read_csv('data/titles_complete_info.csv', usecols = cols) df_movies = df_movies_orig.copy() title_basics_df = title_basics_df_orig.copy() imdb_info_withbudget = imdb_info_withbudget_orig.copy() df_movies.dropna(subset=['worldwide_gross', 'metacritic_score'], inplace=True) df_movies.reset_index(drop=True, inplace=True) df_movies.sort_values(by='release', inplace=True) df_movies.reset_index(drop=True, inplace=True) df_movies.rename( { 'worldwide_gross': 'Worldwide Gross', 'metacritic_score': 'Metacritic Score', 'budget': 'Budget', 'opening': 'Opening', 'gross': 'Gross', 'runtimeMinutes': 'Runtime (Minutes)', 'averageRating': 'Average Rating', 'numVotes': 'Number of Votes' }, axis=1, inplace=True) st.markdown( """<p align="justify"> A commercially successful movie not only provides entertainment to the audience but also enables film producers to generate significant profits. Several factors such as veteran actors, social media presence, popularity, and release time are important for profitability, but they do not always guarantee how a movie will have a great reception to the audience. In this page, we sought to understand temporal patterns affecting movie opening performance, see how popular genres change over years, see movie rankings based on chosen metrics, observe movie runtimes across different genres and observe changes in movie ratings and vote averages over time""", unsafe_allow_html=True) # st.write("See `apps/home.py` to know how to use it.") st.markdown(f"<h2> I. Temporal Pattern of Movie Openings", unsafe_allow_html=True) st.markdown( """<p align="justify">This section aims to analyze the months wherein movies have the best opening performance. The analysis of temporal patterns across the years enables film makers to strategically release films on months wherein such movies are in demand""", unsafe_allow_html=True) df_movies['month'] = pd.DatetimeIndex(df_movies['release']).month opening_by_month_year = df_movies.groupby(["startYear", "month" ]).Opening.mean().reset_index() newdata = ColumnDataSource(opening_by_month_year) mapper = LinearColorMapper(palette=bokeh.palettes.RdBu[9], low=opening_by_month_year["Opening"].min(), high=opening_by_month_year["Opening"].max()) hover = HoverTool(tooltips=[ ("Opening", "@Opening{$,}"), ]) TOOLS = [hover, "save,pan,box_zoom,reset,wheel_zoom"] p = figure(x_axis_label='Year', y_axis_label='Month', tools=TOOLS, plot_width=900) p.rect(x="startYear", y="month", width=1, height=1, source=newdata, fill_color={ 'field': 'Opening', 'transform': mapper }) color_bar = ColorBar(color_mapper=mapper, location=(20, 0), label_standoff=18, ticker=AdaptiveTicker(), formatter=NumeralTickFormatter(format="$,")) p.add_layout(color_bar, 'right') p.title.text = "Movie Opening Performance by Year and Month" p.title.align = "center" p.title.text_font_size = "20px" st.write(p) st.markdown(f"<h2> II. Movie Ranking Analysis", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the rankings of movies per year based on the following criterias: <strong>Budget, Opening, Gross, Worldwide Gross, Metacritic Score, Runtime (Minutes), Average Rating, and Number of Votes</strong>. This section enables analysts to know what are the qualities and characteristics that movies that have appeared on these rankings have. """, unsafe_allow_html=True) years = [] categories = [ 'Budget', 'Opening', 'Gross', 'Worldwide Gross', 'Metacritic Score', 'Runtime (Minutes)', 'Average Rating', 'Number of Votes' ] for i in df_movies['startYear'].unique(): years.append(i) option1 = st.selectbox('Pls select the category', categories) option2 = st.selectbox('Pls select the year', years) figure1 = movie_analyzer(df_movies, category=option1, year=option2) st.plotly_chart(figure1) # st.write('You selected:', option) st.markdown(f"<h2> III. What are the Most Popular Movie Genres?", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the most popular genres as a WordCloud. The larger the font, the more frequently appearing the word is. From the WordCloud, we can observe that Action Movies were the most popular movie genres among film makers during the last 10 years.""", unsafe_allow_html=True) # Join the different processed abstracts together. colors = ["#BF0A30", "#002868"] cmap = LinearSegmentedColormap.from_list("mycmap", colors) long_string = ' '.join(df_movies['genres'].values.tolist()) # Create a WordCloud object wordcloud = WordCloud(background_color="white", colormap=cmap, width=1000, height=300, max_font_size=500, relative_scaling=0.3, min_font_size=5) # Generate a word cloud wordcloud = wordcloud.generate(long_string) # Visualize the word cloud plt.figure(figsize=(100, 100)) fig_cld, axes_cld = plt.subplots(1, 1) axes_cld.imshow(wordcloud, interpolation="bilinear") plt.axis("off") st.pyplot(fig_cld) st.markdown(f"<h2> IV. Movie Runtimes per Genre Analysis", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes movie runtimes per genre. It is important that we identify the characterictics of movies whose runtimes are not normal as these may or may not affect viewership of the said movie. It is also quite possible that these films are experimental in nature and that the director mainly created the movie for test subjects.""", unsafe_allow_html=True) genres = title_basics_df['genres'].unique().tolist() genres.append('All') option3 = st.slider('Pls. choose the number of movies to consider?', 2, 20, 10) option4 = st.selectbox('Pls select the genre', genres) figure2 = runtimemovie_analyzer(title_basics_df, number=option3, genre=option4) st.plotly_chart(figure2) st.markdown(f"<h2> V. Performance for each Genre Across the Years", unsafe_allow_html=True) st.markdown( """<p align="justify">This section aims to visualize the different performance of each genre based on metrics (opening, gross and worldwide gross) across the years 2011 to 2021. """, unsafe_allow_html=True) categories = ['Opening', 'Gross', 'Worldwide Gross'] option5 = st.selectbox('Pls select the category', categories) figure3 = genre_opening_analyzer(df_movies, category=option5) st.plotly_chart(figure3) st.markdown(f"<h2> VI. Average Budget per Genre", unsafe_allow_html=True) st.markdown( """<p align="justify">This section visualizes the average budget per genre across the available data. From the visualization, we can observe that the Action genre has average budgets that were considered as outliers through all the average budgets across genres. Meanwhile, other genres usually have lower budget allocations when being made and such genres include horror, drama, documentaries, comedies. """, unsafe_allow_html=True) fig = plt.figure(figsize=(15, 10)) # fliersize is the size of outlier markers g = sns.boxplot(x='genres', y='budget', data=imdb_info_withbudget, palette="Set2", linewidth=1, fliersize=1.5) g.set(title='Average Budget per Genre', ylabel="Average Budget ($M)", xlabel="") # put a horizontal line on overall mean plt.axhline(imdb_info_withbudget.budget.mean(), ls='--', lw=1, color='black') plt.xticks(rotation=45, ha='right') plt.tight_layout() #fig.savefig("filename.png") st.pyplot(fig)
def bokeh_map(): """This function creates the choropleth map with the time slider using Bokeh library. The input to the main body of this function before, is the geopandas DataFrame that has the'Geometry column' (see below). This map is made especially for disease incidence visualizations and for other visualization purposes (elections, sell-buy, etc), the user should change the high parameter and to make the shp_data variable similar to the following: index Municipality 2004-01 2004-02 2004-3 ... Geometry 0 Eindhoven 2 5 12 POLYGON() 1 Breda 8 14 9 POLYGON() 2 's Hertogenbosch 0 0 19 POLYGON() The time slider will read from the columns 2004-01, 2004-02, etc. """ disease = str(input('What disease would you like to Visualize? ')) # The following condition is to make the map better since the max values # only correspond to just a few municipalities. # Data Preprocessing all_df = disease_studied() data = MonthlyTransform(all_df) data.find_mun() longi, lati = data.center_of_mass() # The find_mun method needs to be reused after the center_of_mass method data.find_mun() data = data.monthly_municipality() shp_data = make_df_shapefile(data) column_list = shp_data.columns.tolist() column_list = [ col for col in column_list if col not in ('Municipality', 'geometry') ] # Main Body if disease == 'Kinkhoest': high = shp_data['2012-04'].max() else: high = shp_data[column_list].max().max() - 3 TOOLS = "pan,wheel_zoom,reset,hover,save" # This will be the default column to start the visualization color_column = '2010-01' geojson = shp_data.to_json() def slider_title(n): return 'Number of Incidences in ' + column_list[n] # Initial Column of the Time Slider N = 0 mapper = LinearColorMapper(palette=Magma256[::-1], low=0, high=high) geo_source = GeoJSONDataSource(geojson=geojson) p = figure(tools=TOOLS, toolbar_sticky=False, plot_width=600, plot_height=285) patches_renderer = p.patches('xs', 'ys', fill_alpha=0.7, fill_color={ 'field': color_column, 'transform': mapper }, line_color='black', line_width=0.2, source=geo_source) center_of_mass = p.circle( x=longi[N], y=lati[N], color="#1a5921", ) # This is to remove the axis of the figure to improve its appearance. p.xaxis.visible = False p.yaxis.visible = False # This is to remove the grid lines of the map to improve its appearance. p.xgrid.visible = False p.ygrid.visible = False hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("Municipality", "@Municipality"), ("Incidences", "@{" + color_column + "}"), ] color_bar = ColorBar(color_mapper=mapper, label_standoff=12, border_line_color=None, location=(0, 0)) p.add_layout(color_bar, 'right') # The end of the slider should be this otherwise there is a bag behavior slider = Slider(start=0, end=len(column_list) + 2, value=N, step=1, width=515, show_value=False, tooltips=False, title='Number of Incidences in ' + column_list[N]) def callback(attr, old, new): N = slider.value color_column = column_list[new] slider.title = slider_title(new) g = patches_renderer.glyph g.fill_color = {**g.fill_color, 'field': color_column} k = center_of_mass.glyph k.x = longi[new] k.y = lati[new] hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("Municipality", "@Municipality"), ("Incidences", "@{" + column_list[N] + "}"), ] slider.on_change('value', callback) layout = column(p, slider) curdoc().add_root(layout)
normalizedTotalDeathsPerMillion() geoDataFrame = geoDataFrame.merge(covidDataFrame, left_on='country_code', right_on='iso_code', how='left') # convert date column to string as datetime dtype cannot be converted as JSON geoDataFrame['date'] = geoDataFrame['date'].astype(str) jsonGeoData = json.loads(geoDataFrame.to_json()) source = GeoJSONDataSource(geojson=json.dumps(jsonGeoData)) # sequential multi-hue color palette. palette = brewer['YlGnBu'][8] # reverse color order so that dark blue is highest obesity. palette = palette[::-1] color_mapper = LinearColorMapper(palette=palette, low=0, high=8) # define custom tick labels for color bar. # tick_labels = {'0': '0%', '5': '5%', '10': '10%', '15': '15%', # '20': '20%', '25': '25%', '30': '30%', '35': '35%', '40': '>40%'} # Create color bar. color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width=500, height=20, border_line_color=None, location=(0, 0), orientation='horizontal') plot = figure(plot_height=600, plot_width=1000, toolbar_location=None)