def update_scatter_c1(hoverData,var1,var2,type): """Updates scatter plot in response to change in granularity, vars, or which precinct user is hovering over""" if type == 'Precincts': try: precinctkey = hoverData['points'][0]['location'] except TypeError: precinctkey = 270402 merged1 = aggregate('Precincts', var1,resultscatalog) df1 = merged1[['PrecinctKey',var1]] merged2 = aggregate('Precincts', var2,resultscatalog) df2 = merged2[['PrecinctKey',var2]] df = df1.merge(df2,left_on='PrecinctKey',right_on='PrecinctKey') index = df.index[df['PrecinctKey'] == precinctkey].tolist() scat = px.scatter(df,x=var1,y=var2,trendline='ols',hover_data=['PrecinctKey']) scat.update_layout(xaxis_title=var1,yaxis_title=var2) results = px.get_trendline_results(scat) print(results.px_fit_results.iloc[0].summary()) else: #for zipcode mappings try: zip = hoverData['points'][0]['location'] except TypeError: zip = 76543 merged1 = aggregate('Zipcodes', var1,resultscatalog) df1 = merged1[['zipcode',var1]] merged2 = aggregate('Zipcodes', var2,resultscatalog) df2 = merged2[['zipcode',var2]] df = df1.merge(df2,left_on='zipcode',right_on='zipcode') index = df.index[df['zipcode'] == zip].tolist() scat = px.scatter(df,x=var1,y=var2,trendline='ols',hover_data=['zipcode']) scat.update_layout(xaxis_title=var1,yaxis_title=var2) results = px.get_trendline_results(scat) print(results.px_fit_results.iloc[0].summary()) return scat
def test_trendline_results_passthrough(mode, options): df = px.data.gapminder().query("continent == 'Oceania'") fig = px.scatter( df, x="year", y="pop", color="country", trendline=mode, trendline_options=options, ) assert len(fig.data) == 4 for trace in fig["data"][0::2]: assert "trendline" not in trace.hovertemplate for trendline in fig["data"][1::2]: assert "trendline" in trendline.hovertemplate if mode == "ols": assert "R<sup>2</sup>" in trendline.hovertemplate results = px.get_trendline_results(fig) if mode == "ols": assert len(results) == 2 assert results["country"].values[0] == "Australia" au_result = results["px_fit_results"].values[0] assert len(au_result.params) == 2 else: assert len(results) == 0
def update_capm_plot(logreturns): df = pd.merge( pd.DataFrame(logreturns).assign( Date=lambda x: pd.to_datetime(x['Date'])), r_ibov.resample('MS').sum().reset_index()).set_index('Date').melt( 'IBOV') fig = px.scatter(df, x='IBOV', y='value', trendline="ols", facet_col='variable', facet_col_wrap=4, opacity=.5, labels={ 'value': 'Retorno excedente', 'IBOV': 'Retorno excedente IBOV' }) fig.update_yaxes(matches=None, showticklabels=False) fig.update_xaxes(showticklabels=False) fig.update_traces(line=dict(dash="dot"), selector=dict(type="scatter", mode="lines")) results = px.get_trendline_results(fig) results['beta'] = results['px_fit_results'].apply(lambda x: x.params[1]) results['alpha'] = results['px_fit_results'].apply(lambda x: x.params[0]) results = results.reset_index().rename(columns={'variable': 'ticker'}) results = results[['ticker', 'beta', 'alpha']] fig.for_each_annotation( lambda a: a.update(text='<b>' + a.text.split("=")[-1] + '</b>')) return fig, results.to_dict('records')
def test_ols_trendline_slopes(): fig = px.scatter(x=[0, 1], y=[0, 1], trendline="ols") # should be "y = 1 * x + 0" but sometimes is some tiny number instead assert "y = 1 * x + " in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [0, 1])) fig = px.scatter(x=[0, 1], y=[1, 2], trendline="ols") assert "y = 1 * x + 1<br>" in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [1, 1])) fig = px.scatter(x=[0, 1], y=[1, 2], trendline="ols", trendline_options=dict(add_constant=False)) assert "y = 2 * x<br>" in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [2])) fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols", trendline_options=dict(add_constant=False)) assert "y = 0 * x<br>" in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [0])) fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols") assert "y = 0<br>" in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [0])) fig = px.scatter(x=[1, 2], y=[0, 0], trendline="ols") assert "y = 0 * x + 0<br>" in fig.data[1].hovertemplate fig = px.scatter(x=[0, 0], y=[1, 1], trendline="ols") assert "y = 0 * x + 1<br>" in fig.data[1].hovertemplate fig = px.scatter(x=[0, 0], y=[1, 2], trendline="ols") assert "y = 0 * x + 1.5<br>" in fig.data[1].hovertemplate
def test_no_slope_ols_trendline(): fig = px.scatter(x=[0, 1], y=[0, 1], trendline="ols") assert "y = 1" in fig.data[1].hovertemplate # then + x*(some small number) results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [0, 1])) fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols") assert "y = 0" in fig.data[1].hovertemplate results = px.get_trendline_results(fig) params = results["px_fit_results"].iloc[0].params assert np.all(np.isclose(params, [0])) fig = px.scatter(x=[1, 2], y=[0, 0], trendline="ols") assert "y = 0" in fig.data[1].hovertemplate fig = px.scatter(x=[0, 0], y=[1, 1], trendline="ols") assert "y = 0 * x + 1" in fig.data[1].hovertemplate fig = px.scatter(x=[0, 0], y=[1, 2], trendline="ols") assert "y = 0 * x + 1.5" in fig.data[1].hovertemplate
def annual_subst_complaints_vs_prop_demo_reg(df, start, stop, figno, demo, ign_pcts=[]): df = df.copy() df = df[df[f"2010_Percent_{demo}_Residents"].notna()] df["Precinct"] = df["Precinct"].astype(int) df = df.rename( columns={ f"2010_Percent_{demo}_Residents": f"2010 Percent {demo} Residents", "Annual_Mean_Substantiated": "Mean Annual Substantiated Misconduct Complaints" }) df["Annual_Mean_Substantiated_Pred"] = cb0s + cb1s * df[ "Annual_Mean_Crime_Reports"] df["Mean Annual 'Excess' Substantiated Complaints"] = df[ "Mean Annual Substantiated Misconduct Complaints"] - df[ "Annual_Mean_Substantiated_Pred"] shapes = seaborn_conf_int(df, f"2010 Percent {demo} Residents", "Mean Annual 'Excess' Substantiated Complaints") fig = px.scatter(df, x=df[f"2010 Percent {demo} Residents"], y=df["Mean Annual 'Excess' Substantiated Complaints"], color=df.Precinct, text=df.Precinct, trendline="ols") fig.update_traces(textposition='top center', textfont_size=6) fig.update_layout(shapes=shapes) fig.update_xaxes( title_text= f"<span style='font-size: 12px;'>Percent {demo} Residents (2010 U.S. Census)</span>" ) fig.update_yaxes( title_text= "<span style='font-size: 12px;'>Mean Annual Number of 'Excess' Substantiated Misconduct Complaints</span>" ) fig.update_layout( title={ 'text': f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual 'Excess' Substantiated Misconduct Complaints vs. Percent {demo} Residents ({start}-{stop})", 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig.show() results = px.get_trendline_results(fig) return df, results.px_fit_results.iloc[0].summary()
def test_overall_trendline(): df = px.data.tips() fig1 = px.scatter(df, x="total_bill", y="tip", trendline="ols") assert len(fig1.data) == 2 assert "trendline" in fig1.data[1].hovertemplate results1 = px.get_trendline_results(fig1) params1 = results1["px_fit_results"].iloc[0].params fig2 = px.scatter( df, x="total_bill", y="tip", color="sex", trendline="ols", trendline_scope="overall", ) assert len(fig2.data) == 3 assert "trendline" in fig2.data[2].hovertemplate results2 = px.get_trendline_results(fig2) params2 = results2["px_fit_results"].iloc[0].params assert np.all(np.array_equal(params1, params2)) fig3 = px.scatter( df, x="total_bill", y="tip", facet_row="sex", trendline="ols", trendline_scope="overall", ) assert len(fig3.data) == 4 assert "trendline" in fig3.data[3].hovertemplate results3 = px.get_trendline_results(fig3) params3 = results3["px_fit_results"].iloc[0].params assert np.all(np.array_equal(params1, params3))
def make_plot(df, x_axis, y_axis): #fig = px.Figure() data = [px.scatter( x=df[x_axis], y=df[y_axis], trendline="ols", )] title = (f"{y_axis} vs {x_axis}") layout = go.Layout( xaxis=dict(title=x_axis), yaxis=dict(title=y_axis), title=title, ) #, xaxis=dict(tickformat="%d-%m") #fig = px.figure(data=data, layout=layout) fig = px.scatter(df, x=x_axis, y=y_axis, trendline="ols", hover_data=["date", x_axis, y_axis]) # fig.add_trace(go.Scatter(x=df[x_axis], y=df[y_axis], mode='markers',)) st.plotly_chart(fig, use_container_width=True) model = px.get_trendline_results(fig) alpha = model.iloc[0]["px_fit_results"].params[0] beta = model.iloc[0]["px_fit_results"].params[1] # st.write (f"Alfa {alpha} - beta {beta}") st.write(f"y = {round(alpha,4)} *x + {round(beta,4)}") r2 = px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared st.write(f"R2 = {r2}") try: c = round(df[x_axis].corr(df[y_axis]), 3) st.write(f"Correlatie {x_axis} vs {y_axis}= {c}") except: st.write("_")
def scatterPlot(self, df, c): """Create and show a scatter plot between two variables Params df: data with values to graph (pd.DataFrame) c: columns to subset (string list) Return: NA """ # make local changes to df localDF = df.copy()[c] # remove outliers c1, c2 = localDF.iloc[:, 0], localDF.iloc[:, 1] c1 = c1[c1.between(c1.quantile(.0), c1.quantile(.95))] # without outliers c2 = c2[c2.between(c2.quantile(.0), c2.quantile(.95))] # without outliers localDF.iloc[:, 0], localDF.iloc[:, 1] = c1, c2 #localDF = self.drop_numerical_outliers(localDF) # remove 0s localDF[c] = localDF[c].replace(0, np.nan) # remove 0s #localDF = np.sqrt(localDF[c]).dropna() localDF = localDF[c].dropna() # setup names x, y = localDF.columns.values tempDict = {x: np.array([]), y: np.array([])} for i in localDF[x].unique(): sub = localDF[localDF[x] == i][y] tempDict[x] = np.append(tempDict[x], i) tempDict[y] = np.append(tempDict[y], np.mean(sub)) tempDF = pd.DataFrame(tempDict) # create and show plot fig = px.scatter(tempDF, x=c[0], y=c[1], trendline="ols") fig.show() # get trend line information results = px.get_trendline_results(fig) summary = results.px_fit_results.iloc[0].summary() print(summary)
def annual_subst_complaints_vs_complaints_per_officer_reg( df, start, stop, figno, ign_pcts=[]): df = df.copy() df = df.rename( columns={ f"Mean_Substantiated_per_Officer": "Mean Substantiated Complaints Per Accused Officer", "Annual_Mean_Substantiated": "Mean Annual Substantiated Misconduct Complaints" }) df["Annual_Mean_Substantiated_Pred"] = cb0s + cb1s * df[ "Annual_Mean_Crime_Reports"] df["Mean Annual 'Excess' Substantiated Complaints"] = df[ "Mean Annual Substantiated Misconduct Complaints"] - df[ "Annual_Mean_Substantiated_Pred"] shapes = seaborn_conf_int( df, f"Mean Substantiated Complaints Per Accused Officer", "Mean Annual 'Excess' Substantiated Complaints") fig = px.scatter( df, x=df[f"Mean Substantiated Complaints Per Accused Officer"], y=df["Mean Annual 'Excess' Substantiated Complaints"], text=df.Precinct, trendline="ols") fig.update_traces(textposition='top center', textfont_size=6) fig.update_layout(shapes=shapes) fig.update_xaxes( title_text= f"<span style='font-size: 12px;'>Mean Annual Number of Substantiated Misconduct Complaints Per Accused Officer</span>" ) fig.update_yaxes( title_text= "<span style='font-size: 12px;'>Mean Annual Number of 'Excess' Substantiated Misconduct Complaints</span>" ) fig.update_layout( title={ 'text': f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual 'Excess' Substantiated Misconduct Complaints vs. Mean Substantiated Misconduct Complaints Per Accused Officer ({start}-{stop})", 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig.show() results = px.get_trendline_results(fig) return df, results.px_fit_results.iloc[0].summary()
def annual_subst_complaints_vs_reported_crime_reg(df, start, stop, figno, ign_pcts=[]): df = df.rename( columns={ "Annual_Mean_Crime_Reports": "Mean Annual Reported Crimes", "Annual_Mean_Substantiated": "Mean Annual Substantiated Misconduct Complaints" }) shapes = seaborn_conf_int( df, "Mean Annual Reported Crimes", "Mean Annual Substantiated Misconduct Complaints") fig = px.scatter(df, x=df["Mean Annual Reported Crimes"], y=df["Mean Annual Substantiated Misconduct Complaints"], text=df.Precinct, trendline="ols") fig.update_traces(textposition='top center', textfont_size=6) fig.update_layout(shapes=shapes) fig.update_xaxes( title_text= "<span style='font-size: 12px;'>Mean Annual Number of Reported Crimes</span>" ) fig.update_yaxes( title_text= "<span style='font-size: 12px;'>Mean Annual Number of Substantiated Misconduct Complaints</span>" ) fig.update_layout( title={ 'text': f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual Substantiated Misconduct Complaints vs. Mean Annual Reported Crimes ({start}-{stop})", 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig.show() results = px.get_trendline_results(fig) global cb0s, cb1s cb0s, cb1s = results.px_fit_results.iloc[0].params return df, results.px_fit_results.iloc[0].summary()
def annual_subst_complaints_vs_officers_reg(dfa, start, stop, figno, ign_pcts=[]): dfa = dfa[(dfa["Year"] >= start) & (dfa["Year"] <= stop)] dfa = dfa[~dfa["Precinct"].isin(ign_pcts)] dfs = dfa[dfa["Board Disposition"].str.contains("Substantiated ")] g = dfs.groupby("Year")["Unique Id"].count().reset_index().rename( columns={"Unique Id": "Substantiated"}) og = dfa.drop_duplicates(["Year", "Num_NYPD_Officers_Year" ])[["Year", "Num_NYPD_Officers_Year" ]].sort_values(by="Year") g = pd.merge(g, og, on="Year") g = g.rename(columns={"Num_NYPD_Officers_Year": "NYPD Officers"}) shapes = seaborn_conf_int(g, "NYPD Officers", "Substantiated") fig = px.scatter(g, x=g["NYPD Officers"], y=g.Substantiated, color=g.Year, text=g.Year, trendline="ols") fig.update_traces(textposition='top center', textfont_size=6) fig.update_layout(shapes=shapes) fig.update_xaxes( title_text= "<span style='font-size: 12px;'>Number of Sworn NYPD Officers</span>") fig.update_yaxes( title_text= "<span style='font-size: 12px;'>Number of Substantiated Misconduct Complaints</span>" ) fig.update_layout( title={ 'text': f"<b>Figure {figno.capitalize()}</b>: Number of Substantiated Misconduct Complaints vs. Number of Sworn NYPD Officers ({start}-{stop})", 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig.show() results = px.get_trendline_results(fig) return g, results.px_fit_results.iloc[0].summary()
def updating_graph(feature1, feature2): fig = px.scatter(df, x=feature1, y=feature2, trendline="ols") results = px.get_trendline_results(fig) results_summary = results.px_fit_results.iloc[0].summary() results_as_html = results_summary.tables[0].as_html() h = pd.read_html(results_as_html)[0] h = h.round(2) vals = list() for col in h.columns: vals.append(list(h[col])) fig3 = go.Figure(data=[ go.Table( #header=dict(values=['A Scores', 'B Scores']), cells=dict(values=vals)) ]) fig3.update_layout(width=700, height=900) return fig, fig3
def get_fig_continent2(df): df = df.groupby(["location", "date_months"]).tail(1) df = df.groupby(["continent", "date_months"])[["total_deaths", "total_cases"]].sum().reset_index() fig = px.scatter(df, x="total_cases", y="total_deaths", color="continent", marginal_x="box", trendline="ols", template="simple_white") results = px.get_trendline_results(fig) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'] slopes = [] text_positions = [(-2 * 10**6, 19000), (1.255 * 10**7, 224264), (7 * 10**6, 307790), (9.85 * 10**6, 333600), (-2 * 10**6, 1000), (9 * 10**6, 280000)] for i in range(results.shape[0]): slopes.append(round(results.iloc[i]["px_fit_results"].params[1], 3)) coeffs_colors_pos = list(zip(slopes, colors, text_positions)) for ccp in coeffs_colors_pos: fig.add_annotation( dict(font=dict(color=ccp[1], size=12), x=ccp[2][0], y=ccp[2][1], showarrow=False, text=ccp[0], textangle=0, xanchor='left', xref="x", yref="y")) return fig
def calc_predictions(x): #running ols equation fig = px.scatter(x, x="X_val", y="Daily_Total", trendline="ols") model = px.get_trendline_results(fig) #getting ols params params = model.px_fit_results.iloc[0].params #getting last real value from dataframe last_xval = x.X_val.iloc[-1] #calculating the value in 2 weeks pred_xval = (last_xval + 1) + 14 #getting 'a' value olsreg_aval = params[0] #getting 'x' value olsreg_xval = params[1] #list with range of values num_pred_list = list(range(last_xval + 1, pred_xval)) #clear list for prediction values pred_val_list = [] #calculating predictions for i in num_pred_list: calc = (olsreg_xval * i) + olsreg_aval pred_val_list.append(round(calc, 0)) #grabbing base date base = x.Date.iloc[-1] #list of date into 14 days date_list = pd.date_range(start=str(base), periods=15) date_list = date_list[1:].tolist() #stripping time date_ymd_list = [] for time in date_list: date_ymd_list.append(time.strftime('%Y-%m-%d')) #creating df with results data_tuples = list(zip(date_ymd_list, pred_val_list)) pred_df = pd.DataFrame(data_tuples, columns=('Dates', 'OLS Values')) return pred_df
def plot_trendline_axis_known(x_axis: Tuple[str, List[float]], y_axis: Tuple[str, List[float]], show_plot: bool = True) -> List[float]: """Function to give a general trend of the input values >>> x_axis_data = [1.0, 2.0, 3.0, 4.0, 5.0] >>> y_axis_data = [2.0, 4.0, 6.0, 8.0, 10.0] >>> plot_trendline_axis_known(('x-axis', x_axis_data), ('y-axis', y_axis_data), False) [-3.1086244689504383e-15, 2.0000000000000004] """ df = pd.DataFrame({x_axis[0]: x_axis[1], y_axis[0]: y_axis[1]}) fig = px.scatter(df, x=x_axis[0], y=y_axis[0], marginal_x="box", marginal_y="violin", trendline="ols") if show_plot: # default is to display plot fig.show() # get results of linear regression and return results = px.get_trendline_results(fig) return list(results.iloc[0]["px_fit_results"].params)
st.subheader("Relación entre Precio (US$) y Superficie (m²)") #Create scatter plot (filtered by zone) fig_scatter = px.scatter(data_stat, x='Surface', y='Price_USD', trendline="ols", color='Price_m2_USD', labels=dict(Surface="Superficie en m²", Price_USD="Precio en US$", Price_m2_USD="Precio por m² (US$)")) st.plotly_chart(fig_scatter, use_container_width=True ) #write the figure in the web app and make it responsive #Get results from the linear regression results = px.get_trendline_results(fig_scatter) results_summary = results.px_fit_results.iloc[0].summary() #Note that tables is a list. The table at index 1 is the "core" table. Additionally, read_html puts dfs in a list, so we want index 0 #Credit to: https://stackoverflow.com/questions/51734180/converting-statsmodels-summary-object-to-pandas-dataframe/52976810 results_as_html = results_summary.tables[0].as_html() reg_results = pd.read_html(results_as_html, header=None, index_col=0)[0] #Read as df r_squared = reg_results.loc['Dep. Variable:'][3] #Extract R-Squared st.write( '<html lang="es"><html translate="no">', "En función del modelo desplegado en el gráfico de dispersión, se puede notar que para la", selected_zone_stat, ", el", "{:.0%}".format(r_squared), "de la varianza en el precio puede ser predicha basándose en la cantidad de m² de la propiedad.",
dict( xref='paper', yref='paper', x=0.5, y=-0.22, xanchor='center', yanchor='top', font=dict(family='Arial', size=12, color=color_footer), showarrow=False, text= 'twitter.com/vivekparasharr | github.com/vivekparasharr | vivekparasharr.medium.com' )) fig.update_layout(template="plotly_dark") fig.show() ################################################################################ # Plotted using plotly express import plotly.express as px fig = px.scatter( x=df2[df2.Code == 'CHL'][df2.Year <= 2002].Daily.values, y=df[df.Code == 'CHL'][df.Year >= 1980].Total.values, error_y_minus=df[df.Code == 'CHL'][df.Year >= 1980].Total.values, trendline="ols") fig.show() results = px.get_trendline_results(fig) print(results) #results.query("sex == 'Male' and smoker == 'Yes'").px_fit_results.iloc[0].summary() results.px_fit_results.iloc[0].summary()
def create_xyplot(clickData, predictand, predictor, fc_time, bdnc, info): mo = np.int(fc_time[5:]) if clickData == None: clickData = info['clickData'] lat_click = clickData['points'][0]['y'] lon_click = clickData['points'][0]['x'] predictand = info['variables_prad'][predictand] predictor = info['variables_pred'][predictor] #print('Hello2!!') #print(lat_click,lon_click) #print(fc_time) #print lat_click #tt = dict_times[fc_time] pred = xr.open_dataset(bdnc + 'predodata_3m_nc_' + predictand + '_' + str(mo).zfill(2) + '.nc') predfit = xr.open_dataset(bdnc + 'predodata_3m_fit_' + predictand + '_' + str(mo).zfill(2) + '.nc') #print(predictor) if predictor == 'CO2': prad = xr.open_dataset(bdnc + 'predadata_v2_' + predictand + '.nc') else: prad = xr.open_dataset(bdnc + 'predadata_3m_nc_' + predictand + '_' + str(mo).zfill(2) + '.nc') # Select right location and time slice #pred1d = pred.sel(lon=lon_click,lat=lat_click,method=str('nearest')).isel(time=slice(None,-tt)) try: #print('try for 3d predictor..') pred1d = pred[predictor].sel( lon=lon_click, lat=lat_click, method=str('nearest')).sel(time=(pred['time.month'] == mo)) pred1d_fit = predfit[predictor].sel( lon=lon_click, lat=lat_click, method=str('nearest')).sel(time=(pred['time.month'] == mo)) except ValueError: #print('.. went for 1d predictor') pred1d = pred[predictor].sel(time=(pred['time.month'] == mo)) pred1d_fit = predfit[predictor].sel(time=(pred['time.month'] == mo)) prad1d = prad.sel( lon=lon_click, lat=lat_click, method=str('nearest')).sel(time=(prad['time.month'] == mo)) print('prad1d', prad1d) print('pred1d', pred1d) print('pred1d_fit', pred1d_fit) data_orig = xr.merge([ prad1d.to_array(name='predictand').squeeze(), pred1d.rename('orig'), pred1d_fit.rename('fit') ]).to_dataframe() #data_fit = xr.merge([prad1d.to_array(name='predictand').squeeze(),pred1d_fit]).to_dataframe() print(data_orig) data_melt_orig = data_orig.dropna().melt(id_vars='predictand', value_vars=['orig', 'fit']) #data_melt_fit = data_fit.dropna(dim='time').melt(id_vars='predictand',value_vars=['predictor_fit']) print(data_melt_orig) fig = px.scatter(data_melt_orig, x='value', y='predictand', color='variable', trendline='ols') fig.data[-1].name = 'Diner' fig.data[-1].showlegend = True results = px.get_trendline_results(fig) print(results.iloc[0]) fig.update_layout(legend=go.layout.Legend( #x=0.8, #y=0.9, traceorder="normal", font=dict(family="sans-serif", size=12, color="black"), #bgcolor="LightSteelBlue", bordercolor="Black", borderwidth=2)) fig.update_layout( go.Layout( title= 'Correlation between burned area and observed and forecasted MDC (lat=' + str(lat_click) + ', lon=' + str(lon_click) + ')', autosize=False, height=500, #yaxis=dict(title='Burned Area [km2]'), )) fig.update_yaxes(title_text=predictand) fig.update_xaxes(title_text=predictor) #fig.update_yaxes(title_text="Monthly Drought Code [-]", secondary_y=True) print(' ') print('>>> Finished create_cor_time_series <<<') print(' ') return (fig)
) st.plotly_chart(fig_map) st.subheader('Population v/s Hospital Count') st.text('''Scatter plot with trendline. See below graph for correlation coefficient if more than one state selected''') fig_scatter = px.scatter(df[df['ST'].isin(states_choice)], x="Population", y="Hospital Count", text="ST", trendline="ols") st.plotly_chart(fig_scatter) if len(states_choice) > 1: r2_value = px.get_trendline_results( fig_scatter).px_fit_results.iloc[0].rsquared st.text(f'Correlation Coefficient (R^2 value): {r2_value:.5f}') st.subheader('Hospital Count by State') st.text('Bar chart showing number of Hospitals by State') fig_bar = px.bar( df[df['ST'].isin(states_choice)], x='ST', y='Hospital Count', text='Hospital Count', labels={'ST': 'State'}, ) st.plotly_chart(fig_bar)
def __call__( self, screen_object: Any, mode: str = 'pointmutant', show_results: bool = False, replicate: int = -1, output_html: Union[None, str, Path] = None, **kwargs: Any, ) -> None: """ Generate a scatter plot between object and a second object of the same class. Parameters ---------- screen_object : object from class *Screen* to do the scatter with mode : str, default 'pointmutant'. Alternative set to "mean" for the mean of each position. show_results : boolean, default False If set to true, will export the details of the linear fit. replicate : int, default -1 Set the replicate to plot. By default, the mean is plotted. First replicate start with index 0. If there is only one replicate, then leave this parameter untouched. output_html : str, default None If you want to export the generated graph into html, add the path and name of the file. Example: 'path/filename.html'. **kwargs : other keyword arguments """ temp_kwargs: Dict[str, Any] = self._update_kwargs(kwargs) # Chose mode: if mode == 'pointmutant': self.df_output = process_by_pointmutant( self.dataframes.df_notstopcodons[replicate], screen_object.dataframes.df_notstopcodons[replicate] ) elif mode == 'mean': self.df_output = process_mean_residue( self.dataframes.df_notstopcodons[replicate], screen_object.dataframes.df_notstopcodons[replicate] ) self.df_output['Variant'] = self.df_output['Position'] # raise error if mode is not "mean" or "pointmutant" # create figure self.fig = px.scatter( x=self.df_output['dataset_1'], y=self.df_output['dataset_2'], trendline="ols", trendline_color_override="red", ) self._tune_plot(temp_kwargs) self._save_html(output_html, temp_kwargs) if show_results: px.get_trendline_results(self.fig).px_fit_results.iloc[0].summary()
def create_cor_fires(clickData,base_time,valid_time,area_size,variable='MDC (from TP)'): print(' ') print('>>> Starting create_cor_fires <<<') print(' ') PRINT = False if clickData == None: clickData = clickData_start #month = np.int(fc_time[5:]) st = base_times[base_time] lt = valid_times[valid_time] year = 2018 name1 = str(st)+'_'+str(lt)+'.nc' name2 = str(st)+'.nc' lat_click=clickData['points'][0]['y'] lon_click=clickData['points'][0]['x'] la1 = lat_click+0.1+area_sizes[area_size]/2 la2 = lat_click-0.1-area_sizes[area_size]/2 lo1 = lon_click-0.1-area_sizes[area_size]/2 lo2 = lon_click+0.1+area_sizes[area_size]/2 predictand = variables[variable] REGION = True if REGION: modis = xr.open_dataset(bdnc+'2001-2018-MODIS_BA_r10.nc').sel(lon=slice(lo1,lo2),lat=slice(la1,la2)).sum(dim=['lat','lon']) modis_m = modis.sel(time=modis['time.month'] == lt) print(modis) print(modis_m) pred = xr.open_dataset(bdnc+'pred_v2_'+variables[variable]+'_'+name2).sel(lon=slice(lo1,lo2),lat=slice(la1,la2),leadtime=lt,time=slice('2001','2019')) pred1d = pred.kprep.mean(dim=['lat','lon','ens']) obs1d = pred.obs.mean(dim=['lat','lon']) else: modis = xr.open_dataset(bdnc+'2001-2018-MODIS_BA_r10.nc').sel(lon=lon_click,lat=lat_click,method='nearest') modis_m = modis.sel(time=modis['time.month'] == lt).load() pred = xr.open_dataset(bdnc+'pred_v2_'+variables[variable]+'_'+name2).sel(lon=lon_click,lat=lat_click,method=str('nearest')).sel(leadtime=lt,time=slice('2001','2019')) pred1d = pred.kprep.mean(dim='ens') obs1d = pred.obs if PRINT: print('create_cor_fires - modis time',modis_m.time) pred1d = pred1d.assign_coords(time=modis_m.time) # Correct time, was base time iso valid time obs1d = obs1d.assign_coords(time=modis_m.time) # Correct time, was base time iso valid time if PRINT: print('create_cor_fires - modis burned area',modis_m['burned_area']) if PRINT: print('create_cor_fires - krpep values',pred1d) cor_ba_kprep = scipy.stats.pearsonr(pred1d.values.squeeze()[:-1],modis_m['burned_area'].values.squeeze()[:-1]) cor_ba_obs = scipy.stats.pearsonr(obs1d.values.squeeze()[:-1],modis_m['burned_area'].values.squeeze()[:-1]) data = xr.merge([modis_m / 1.e6,obs1d,pred1d]).drop('leadtime').to_dataframe() # Rewrite data in order to use plotly express data_melt = data.dropna().melt(id_vars='burned_area', value_vars=['obs', 'kprep']) fig = px.scatter(data_melt, x='value', y='burned_area', color='variable',trendline='ols') fig.data[-1].name = 'Diner' fig.data[-1].showlegend = True results = px.get_trendline_results(fig) print(results.iloc[0]) fig.update_layout( legend=go.layout.Legend( #x=0.8, #y=0.9, traceorder="normal", font=dict( family="sans-serif", size=12, color="black" ), #bgcolor="LightSteelBlue", bordercolor="Black", borderwidth=2 ) ) fig.update_layout(go.Layout( title = 'Correlation between burned area and observed and forecasted MDC (lat='+str(lat_click)+', lon='+str(lon_click)+')', autosize=False, height=500, #yaxis=dict(title='Burned Area [km2]'), )) fig.update_yaxes(title_text="Burned Area [km2]") fig.update_xaxes(title_text="MDC [-]") #fig.update_yaxes(title_text="Monthly Drought Code [-]", secondary_y=True) print(' ') print('>>> Finished create_cor_time_series <<<') print(' ') return(fig)