def scatter_dist_by_mappings(dataset, x_kdims, y_kdims, mappings, selection_dim="Gene", datashade_=False, dynspread_=False, ): data_groups = {name: dataset.sel({selection_dim: genes}) for name, genes in mappings.items()} data_group_dfs = {k: v[[x_kdims, y_kdims]].to_dataframe() for k, v in data_groups.items()} points = {k: hv.Points(val, kdims=[x_kdims, y_kdims]) for k, val in data_group_dfs.items()} dist_x = {k: univariate_kde(hv.Distribution(p, kdims=[y_kdims], group="dist_x"), n_samples=1000) for k, p in points.items()} dist_y = {k: univariate_kde(hv.Distribution(p, kdims=[x_kdims], group="dist_y"), n_samples=1000) for k, p in points.items()} if datashade_: points_overlay = datashade(hv.NdOverlay(points)) if dynspread_: points_overlay = dynspread(points_overlay) else: points_overlay = hv.NdOverlay(points) return points_overlay << hv.NdOverlay(dist_x) << hv.NdOverlay(dist_y)
def model_comp(models): labels = ['Gradient Boosting: ', 'Logistic Regression','Naive Bayes', 'Random Forest'] predicted_y_probs = [clf.predict_proba(X_test)[:,0] for clf in models] thresholds = np.linspace(0,1,100) # or however many points you want sens = [[recall_score(y_test, predicted_y_probs[i] >= t) for t in thresholds] for i in range(4)] prec = [[precision_score(y_test, predicted_y_probs[i] >= t) for t in thresholds] for i in range(4)] x = [(hv.Distribution(sens[i], label='Sensitivity')\ *hv.Distribution(prec[i], label='Precision')) for i in range(4)] x = [x[i].opts(xlabel=labels[i]) for i in range(4)] return x[0],x[1],x[2],x[3]
def modify_doc(doc): points = hv.Points(np.random.randn(100, 2)) points2 = hv.Points(np.random.randn(100, 2) * 2 + 1) xdist, ydist = ((hv.Distribution(points2, kdims=[dim]) * hv.Distribution(points, kdims=[dim])).redim.range(x=(-5, 5), y=(-5, 5)) for dim in 'xy') composition = (points2 * points) << ydist.opts(width=125) << xdist.opts(height=125) final = composition plot = renderer.get_plot(final) layout = row(plot.state) # renderer.server_doc(layout) doc.add_root(layout)
def kde(self): plot_opts = dict(self._plot_opts) plot_opts['invert_axes'] = self.kwds.get('orientation') == 'horizontal' opts = dict(plot=plot_opts, style=dict(alpha=self.kwds.get('alpha', 1))) return hv.Distribution(self.data.to_frame(), self.data.name).opts(**opts)
def create_density_plots(df, density, kdims, cmap): cm = {} if density == 'all': dfs = {_sentinel: df} elif density == 'group': if 'z' not in df.columns: warnings.warn( f'`density=\'groups\' was specified, but no group found. Did you specify `color=...`?' ) dfs = {_sentinel: df} elif not is_categorical(df['z']): warnings.warn( f'`density=\'groups\' was specified, but column `{condition}` is not categorical.' ) dfs = {_sentinel: df} else: dfs = {k: v for k, v in df.groupby('z')} cm = cmap else: raise ValueError( f'Invalid `density` type: \'`{density}`\'. Possible values are `\'all\'`, `\'group\'`.' ) # assumes x, y order in kdims return [ hv.Overlay([ hv.Distribution(df, kdims=dim).opts(color=cm.get(k, 'black'), framewise=True) for k, df in dfs.items() ]) for dim in kdims ]
def select_widget(num_var): """ This program must take in a variable passed from the widget and turn it into a chart. The input is known as num_var and it is the variable you must use to get the data and build a chart. The output must return a HoloViews Chart. """ color = next(colors) hv_look = hv.Distribution(np.histogram(dft[num_var]), num_var).opts(color=color, height=height_size, width=width_size, alpha=transparent, title='KDE (Distribution) Plot of Numeric Variables') return hv_look
def show(df_x, df_y, mlp, xs, ys): y = np.ravel(ys.transform(df_y)) y_pred = mlp.predict(xs.transform(df_x)) r2 = mlp.score(xs.transform(df_x), y) print('Score: ', r2) return pn.Column( pn.Row( hv.Scatter((y, y_pred)).opts(aspect='square'), hv.Distribution(y_pred - y).opts(aspect='square')), hv.Curve((df_y.index, y_pred), label='prediction') * hv.Curve( (df_y.index, y), label='target').opts(width=800))
def hv_scatter_dist(dataset, x_kdims, y_kdims, datashade_=False, dynspread_=False): if dynspread_ and not datashade_: warnings.warn("Dynspread can only be used with datashade, setting both to true.") datashade_ = True df = dataset[[x_kdims, y_kdims]].to_dataframe() points = hv.Points(df, kdims=[x_kdims, y_kdims]) dist_x = univariate_kde(hv.Distribution(points, kdims=[y_kdims], group="dist_x"), n_samples=1000) dist_y = univariate_kde(hv.Distribution(points, kdims=[x_kdims], group="dist_y"), n_samples=1000) if datashade_: points = datashade(points) if dynspread_: points = dynspread(points) return points << dist_x << dist_y
def select_widget(Select_numeric_variable): """ This program must take in a variable passed from the widget and turn it into a chart. The input is known as num_var and it is the variable you must use to get the data and build a chart. The output must return a HoloViews Chart. """ color = next(colors) overlay = hv.NdOverlay({group: hv.Distribution(np.histogram(dft[dft[dep]==group][Select_numeric_variable].values)) for i,group in enumerate(target_vars)}) hv_look = overlay.opts(opts.Distribution(alpha=0.5, height=height_size, width=width_size)).opts( title='KDE (Distribution) Plots of all Numeric Variables by Classes').opts( xlabel='%s' %dep).opts(ylabel='%s' %Select_numeric_variable) return hv_look
# %% ## Plot data inset map # Create data subset (for map plotting efficiency) res_subset = gdf_traces.sample(2500) trace_plt = gv.Points(res_subset, crs=ANT_proj).opts(projection=ANT_proj, color='red') Ant_bnds * trace_plt #%% one_to_one = hv.Curve(data=pd.DataFrame({'x': [100, 600], 'y': [100, 600]})) scatt_accum = hv.Points(data=pd.DataFrame(gdf_traces), kdims=['accum2011', 'accum2016'], vdims=[]) dist_2011 = hv.Distribution(data=scatt_accum, kdims=['accum2011']) dist_2016 = hv.Distribution(data=scatt_accum, kdims=['accum2016']) accum_dist = hv.Distribution(data=gdf_traces.accum_res) (hv.Layout((one_to_one.opts(color='black') * scatt_accum.opts( xlim=(100, 600), ylim=(100, 600), xlabel='accum2011', ylabel='accum2016') ) << dist_2016.opts(width=130, xlim=( 100, 600)) << dist_2011.opts(height=130, xlim=(100, 600))) + accum_dist) #%% # Plot spatial distribution of mean accum residual accum_plt = gv.Points(res_subset, vdims=['accum_res', 'accum2011', 'accum2016'], crs=ANT_proj).opts(projection=ANT_proj, color='accum_res',
import cProfile import line_profiler import holoviews as hv import numpy as np from holoviews import opts from guess import RcaHmm hv.extension("bokeh", "matplotlib") opts.defaults(opts.Distribution(width=650)) series = np.genfromtxt("data/1995/RCAmatrix1995.txt") model = RcaHmm(series, 4) hv.Distribution(np.log(series[series.nonzero()])) # %% model.baum_welch(series, 6) hv.Curve(model.lk, "iterations", "likelihood") # %% gen_states = np.genfromtxt("gen_param/states.txt") right_states = 100 * np.count_nonzero( gen_states == model.viterbi(series)) / series.size right_viterbi = 100 * np.count_nonzero( gen_states == model.states(series)) / series.size print(f"Right states with Viterbi: {right_states:.2f}%\n" f"Right states with gamma argmax: {right_viterbi:.2f}%")
[[tabla_resumen, plot_p, plot_EUR], [plot_alta, plot_media, plot_baja]], toolbar_options=dict(logo=None)) #Lists Of Rows Layout show(layout) #Se abré en navegador output_file("output_pozos_tipo.html", title="Pozos Tipo") ####### Holoviews import pandas as pd import holoviews as hv from holoviews import opts from holoviews.operation.stats import univariate_kde hv.extension('bokeh') dist = hv.Distribution(serie_resumen.dias_perforacion, label='Dias de perforacion - Función de Probabilidad') hist = serie_resumen.dias_perforacion.dropna() hist = np.histogram(hist) plot_hist = hv.Histogram(hist) #kde = univariate_kde(dist, # bin_range=(0, serie_resumen.dias_perforacion.max()), # bw_method='scott', # n_samples=1000) #kde scatter = hv.Scatter(serie_resumen, kdims=['dias_perforacion', 'profundidad_total'], label='Dias de perforacion vs Profundidad total')
# %%opts Curve [width=500 height=300] hv.DynamicMap(simulation_plots, kdims=['days', 'runs']).redim.range(days=(100, 500), runs=(5, 15)).options(width=900, height=400) def simulation_prices(days=100, runs=1000, axis=0): run = [] for _ in range(runs): a = Accounts() prices = pd.DataFrame([a.price()[0] for day in range(days)], columns=['return']) run.append(prices) output = pd.concat(run, axis=axis) output.columns = [f'Run {i + 1}' for i in range(output.shape[1])] return output simulations = simulation_prices() # %%opts Overlay [show_title=True] Distribution [height=500, width=1000] hv.Distribution( np.random.normal(simulations.mean(), simulations.std(), 100000), label='Normal') * hv.Distribution( simulations.iloc[:, 0], label='Simulation').options(fill_alpha=0.0)
# Plots of mean annual accumulation for 2011 and 2016 (supporting plot to accum residual plot above). # %% # Spatial distribution in trend residuals trends_plt = gv.Points( res_subset, vdims=['trend_res'], crs=ANT_proj).opts( projection=ANT_proj, color='trend_res', cmap='coolwarm', symmetric=True, colorbar=True, clabel='mm/yr^2', tools=['hover'], width=750, height=500) trends_plt.relabel('Linear trend residuals') #%%[markdown] # Plot showing the spatial distribution in trend residuals between 2011 and 2016. # #%% trends_hist = hv.Distribution( gdf_traces.trend_res).opts(xlabel='Linear trend (mm/yr^2)') trends_hist.relabel('Distribution in trend residuals') #%% t_plt2011 = gv.Points( res_subset, vdims=['trend2011', '2011 lb', '2011 ub'], crs=ANT_proj). opts( projection=ANT_proj, color='trend2011', cmap='coolwarm', symmetric=True, colorbar=True, clabel='mm/yr^2', tools=['hover'], width=450, height=300) t_plt2016 = gv.Points( res_subset, vdims=['trend2016', '2016 lb', '2016 ub'], crs=ANT_proj). opts( projection=ANT_proj, color='trend2016',
d.sort_values('time', inplace=True) d['norm'] = (d['corrected_intensity'].values - d['corrected_intensity'].values.min()) / ( d['corrected_intensity'].values.max() - d['corrected_intensity'].values.min()) mean_norm = (mean_bleach['corrected_intensity'].values - mean_bleach['corrected_intensity'].min()) / ( mean_bleach['corrected_intensity'].max() - mean_bleach['corrected_intensity'].min()) d['resid'] = (d['norm'].values - mean_norm)**2 * d.iloc[0]['corrected_intensity'] dfs.append(d) norm_df = pd.concat(dfs) norm_df.to_csv('output/{}_{}C_{}_{}_bleaching.csv'.format( DATE, TEMP, CARBON, OPERATOR)) # %% bins = np.linspace(0, 1, 5) norm_df['bins'] = pd.cut(norm_df['norm'], bins) grouped = pd.DataFrame(norm_df.groupby('bins').median()).reset_index() grouped.dropna(inplace=True) alpha = 2 * 6 * np.trapz(grouped['resid'], grouped['norm']) hv.Curve(grouped, ['norm'], ['resid']) norm_df[norm_df['time'] == 0]['corrected_intensity'].values / alpha bs = mwc.stats.fast_bootstrap(norm_df, 5, iter=1E4) hv.Distribution(bs) # traces = hv.Curve(norm_df, ['time', 'cell_id'], ['norm']).groupby('cell_id').options(alpha=0.05, color='slategray').overlay() # mean = hv.Curve((np.arange(0, len(mean_norm), 1), mean_norm)).options(color='firebrick') # traces * mean
np.log(country_series[country_series.nonzero()].min()), np.log(country_series.max()), 250, ) occ = (np.count_nonzero( np.bitwise_and( country_states == np.arange(4)[:, np.newaxis, np.newaxis], country_series != 0), axis=1) / country_series.shape[0]) # yapf: disable occ /= occ.sum(0) dists = norm.pdf(support[:, np.newaxis], loc=country_model.means, scale=country_model.devs)[..., np.newaxis] * occ agg_plot = hv.HoloMap( { t + 1995: hv.Distribution( np.log(country_series[country_series[:, t].nonzero()[0], t])) * hv.Overlay([hv.Curve((support, dists[:, i, t])) for i in range(4)]) for t in range(country_series.shape[1]) }, "year", ).redim(Density="density", Value="log(RCA)") agg_plot # %% save_country_model(country, country_model, country_states)
def plot_perforacion(): elementos_perforacion=serie_resumen[['dias_perforacion','Qi_hist','estado_actual']] tabla_perforacion = hv.Table(elementos_perforacion,'pozo') tabla_perforacion.opts(height=500,width=400,fontscale=20) dist = hv.Distribution(serie_resumen.dias_perforacion, label='Dias de perforacion - Función de Probabilidad') hist=serie_resumen.dias_perforacion.dropna() hist=np.histogram(hist) plot_hist = hv.Histogram(hist) #kde = univariate_kde(dist, # bin_range=(0, serie_resumen.dias_perforacion.max()), # bw_method='scott', # n_samples=1000) #kde scatter = hv.Scatter(serie_resumen, kdims=['dias_perforacion','profundidad_total'], label='Dias de perforacion vs Profundidad total') #dist = dists.redim.label(dias_perforacion='Dias de perforacion') scatter = scatter.redim.label(dias_perforacion='Dias de perforacion', profundidad_total='Profundidad total') tiempos = tabla_perforacion + dist + scatter tiempos.opts( opts.Distribution(height=500, width=700, xaxis=True, xlabel='Dias de Perforacion', xlim=(0,serie_resumen.dias_perforacion.max()), line_width=1.00, color='grey', alpha=0.5, fontscale=1.5, tools=['hover']), opts.Scatter(height=500, width=700, xaxis=True, yaxis=True, size=dim('Qi_hist')*50, line_width=0.25, color='estado_actual', cmap='Set1', fontscale=1.5, legend_position='bottom_right')) #fill_color=factor_cmap('estado_actual', palette=Spectral6, factors=elementos_tipos['tipo'])) tiempos hv.output(tiempos, backend='bokeh', fig='html') hv.save(tiempos, 'curvas_tipo.html') #hv.save(tiempos, 'tiempos.html') return
def plotData(self): scatterDataPlot = hv.Scatter(self.distDF[['data']], label='Scatter plot') scatterDataPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) distPlot = hv.Distribution(self.distDF[['data']], label='Distribution') distPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) d = ['Gamma', 'Rayleigh', 'Normal', 'Lognormal', 'Nakagami', 'Exponential', 'Weibull'] points = [(d[i], self.aicData[i]) for i in range(7)] aicPlot = hv.Points(points, ['distribution', 'score'], label='AIC_c') aicPlot.opts(tools=['hover'], color='blue', marker='o', size=10, show_grid=False, line_width=2, align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) gammaPlot = hv.Points(self.distDF[['data', 'gammaDist']], label='Gamma distribution') gammaPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) rayelighPlot = hv.Points(self.distDF[['data', 'rayleighDist']], label='Rayleigh distribution') rayelighPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) normPlot = hv.Points(self.distDF[['data', 'normDist']], label='Normal distribution') normPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) lognormPlot = hv.Points(self.distDF[['data', 'lognormDist']], label='Lognormal distribution') lognormPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) nakagamiPlot = hv.Points(self.distDF[['data', 'nakagamiDist']], label='Nakagami distribution') nakagamiPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) exponPlot = hv.Points(self.distDF[['data', 'exponDist']], label='Exponential distribution') exponPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) weibPlot = hv.Points(self.distDF[['data', 'weibDist']], label='Weibull distribution') weibPlot.opts(align='center', height=300, width=450, xrotation=45, fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12}) layout = hv.Layout(aicPlot + scatterDataPlot + distPlot + exponPlot + rayelighPlot + normPlot + lognormPlot + weibPlot + gammaPlot + nakagamiPlot).cols(3) hvplot.show(layout)