def _plot_manual_exercise(df): pallete = palletes[max(palletes)] # get last # todo one axis for count, one for seconds? although not really gonna work for # maybe on separate plots? has_err = df['error'].notna() errs = df[has_err].copy() errs['reps'] = 1 # meh some_dt = df['dt'].dropna().iloc[-1] errs['dt'].fillna(some_dt, inplace=True) # not sure? some errs have reps.. errs['reps'].fillna(-5) # meh df = df[~has_err] # TODO would be nice to reuse stuff to display errors as a table # FIXME handle none carefully here, otherwise they aren't displayed plots = [] # todo hmm, reuse group hints somehow? not sure.. # TODO helper groupby to check for None (to make sure they are handled) groups = list(df.groupby('kind')) # sort by the most recent groups = list(sorted(groups, key=lambda kind_d: max(unlocalize(kind_d[1]['dt'])), reverse=True)) kinds = [kind for kind, _ in groups] # make colors stable colors = {kind: c for kind, c in zip(kinds, cycle(pallete))} colors['errors'] = 'red' x_range = None for k, edf in chain( [('errors', errs)], groups, ): color = colors[k] # TODO add some x jitter to declutter? # need to preserve the order though? I guess need to group p = date_figure(df, height=150, x_range=x_range) p.scatter(x='dt', y='reps' , source=CDS(edf), legend_label='reps' , color=color) from bokeh.models import LinearAxis, Range1d maxy = np.nanmax(edf['volume'] * 1.1) # TODO meh if not np.isnan(maxy): # I guess doesn't have volume? p.extra_y_ranges = {'volume': Range1d(start=0.0, end=maxy)} # add the second axis to the plot. p.add_layout(LinearAxis(y_range_name='volume'), 'right') p.scatter(x='dt', y='volume', source=CDS(edf), legend_label='volume', color='black', size=2, y_range_name='volume') p.title.text = k p.y_range.start = 0 if x_range is None: x_range = p.x_range plots.append(p) # TODO not sure if I want sliding averages? return column(plots)
def plot_sleep_intervals(df): df = _sleep_df(df) ints = df[[ 'sleep_start', 'sleep_end' ]].applymap(lambda dt: None if pd.isnull(dt) else _mins(dt.time())) # todo maybe instead plot angled lines? then won't need messing with minutes at all? Although still useful to keep p = date_figure() mint = _mins(time(22, 0)) maxt = _mins(time(11, 0)) # TODO need to handle nans/errors? p.vbar(source=CDS(ints), x='date', width=timedelta(1), bottom='sleep_start', top='sleep_end', color='black', alpha=0.1) from .core.bokeh import set_hhmm_axis # TODO also guess mint/maxt? set_hhmm_axis(p.yaxis, mint=mint, maxt=maxt) add_daysoff(p) return p
def add_text(plot, *, text: str, **kwargs): from bokeh.models import Text # ugh. for f**k's sake, Label doesn't support multiline... https://github.com/bokeh/bokeh/issues/7317 # and glyphs always want a data source textsrc = CDS({'text': [text]}) kwargs['text'] = 'text' glyph = Text(**kwargs) plot.add_glyph(textsrc, glyph)
def add_daysoff(plot, *, dates=None, bottom=None, top=None): if bottom is None or top is None: b, t = guess_range(plot, axis='y') # todo how to extend the range a bit so there is some leeway? bottom = bottom or b top = top or t if dates is None: mind, maxd = guess_range(plot, axis='x') else: mind = min(dates) maxd = max(dates) day = timedelta(1) # todo need to keep boundary (-0.5, 0.5)? .. or it's automatic?? days = list(np.arange(mind, maxd, day)) # TODO make a dataframe with it?? days_2 = pd.date_range(start=mind, end=maxd, freq='D') days_2.name = 'date' # FIXME make non-defensive, this is only temporary for tests try: import my.calendar.holidays as holidays except Exception as e: import logging logging.exception(e) return col_df = pd.DataFrame(index=days_2, columns=['color']) def calc_color(row): # todo separate column, abstract away dt = row.name is_workday = holidays.is_workday(dt) return 'blue' if is_workday else 'red' col_df['color'] = col_df.apply(calc_color, axis=1) # right... nice thing about this is that it's infinite to the top and bottom... # but I have no clue how to make them togglable... # from bokeh.models import BoxAnnotation # for d, row in col_df.iterrows(): # ann = BoxAnnotation(left=d, right=d + day, fill_alpha=0.1, fill_color=row['color']) # plot.add_layout(ann) # https://stackoverflow.com/a/56258632/706389 # todo box annotation vs vbar?? return plot.vbar( source=CDS(col_df), x='date', color='color', width=day, bottom=bottom, top=top, alpha=0.05, legend_label='Days off', )
def plot(day: str, df): # from bokeh.io import output_notebook # output_notebook() # mx = [] # my = [] # for lat, lon in zip(df['lat'], df['lon']): # (mmx, mmy) = merc(lat, lon) # mx.append(mmx) # my.append(mmy) df = pd.DataFrame( (merc(lat, lon) for _, (lat, lon) in df[['lat', 'lon']].iterrows()), columns=['mlon', 'mlat'], index=df.index, ) # todo err... swap lat and lon? # df['mlon'] = mx # df['mlat'] = my # range bounds supplied in web mercator coordinates # p = figure(x_range=(-2000000, 6000000), y_range=(-1000000, 7000000), # todo set some reasonable minimum span? otherwise if there is no movement, almost nothing is displayed x_range = min(df['mlon']) - 100, max(df['mlon']) + 100 y_range = min(df['mlat']) - 100, max(df['mlat']) + 100 # print(x_range, y_range) # print(max(df['mlon']), min(df['mlon'])) # TODO determine range from data? p = figure( title='map', x_axis_type='mercator', y_axis_type='mercator', # todo autodetect? # todo hmm, if I fix width, sometimes rendering fails? 20171118 # plot_width =2500, # plot_height=1400, x_range=x_range, y_range=y_range, ) p.add_tile(tile_provider) p.circle(x='mlon', y='mlat', size=10, fill_color='blue', fill_alpha=0.8, source=CDS(df)) # I guess mimicking the same interface as google maps is ok? e.g. sidebar with points and time on the left and highlight them (maybe with different colors depending on whether they are in the future or in the past) #save to html file # output_file("file.html") # save(plot) return p
def plot(p, df): # todo hmm, they overlap? maybe need to stack p.quad(source=CDS(df), top='top', bottom='bottom', left='left', right='right', color='darkgreen', alpha='alpha') top = 26 * 60 set_hhmm_axis(p.yaxis, mint=0, maxt=top, period=60) add_daysoff(p)
def __init__(self, prefs={}): # dictionary to hold the dataframes to be rendered self._gene_data = dict( transcripts=CDS(self._placeholder()), #coding_exons=CDS(exon_data_frame()), #noncoding_exons=CDS(exon_data_frame()), exons=CDS(exon_data_frame()), introns=CDS(intron_data_frame()), labels=CDS(transcript_label_data_frame())) # preferences self._prefs = deepcopy(defaults) self._prefs.update(prefs) # the transcript data from the database self._transcripts = None # flag to indicate if the gene data to display has changed self._dirty_flag = False # create the plot self._figure = self._create_plot()
def plot_multiple(df, *, columns, **kwargs): # todo autodiscover columns somehow? # basically all except dates? # todo use multiindex for groups? not sure if possible # https://stackoverflow.com/questions/30791839/is-there-an-easy-way-to-group-columns-in-a-pandas-dataframe # todo make configurable from bokeh.palettes import Dark2_5 as palette # type: ignore from .pandas import read_group_hints, read_range_hints groups = read_group_hints(df) range_hints = read_range_hints(df) # todo think of a better name?.. x_range = None # todo height?? # todo for grouping, could simply take soft hints? # and if something is unknown, just warn and display on the plot plots = [] for grp in groups: # todo add source to annotation? # todo rely on kwargs for date x axis? p = date_figure(x_range=x_range, **kwargs) # todo color rainbow?? for f, color in zip(grp, cycle(palette)): # todo make this behaviour adjustable? fdf = df[df[f].notna()] # TODO rely on dt index? it can be non-unique so it should be fine... p.scatter(x='dt', y=f, source=CDS(data=fdf), color=color, legend_label=f) p.line(x='dt', y=f, source=CDS(data=fdf), color=color, legend_label=f) # TODO axis labels # hmm it actually uses glucose level as an example # https://docs.bokeh.org/en/latest/docs/user_guide/annotations.html#box-annotations # TODO from bokeh.sampledata.glucose import data -- could use for demo/testing rhs = range_hints.get(f, []) # TODO if no color, just vary color + ??? for rh in rhs: # right. annotation works, but wasn't sure how to make it toggable # from bokeh.models import BoxAnnotation # type: ignore # normal = BoxAnnotation(bottom=rh.low, top=rh.high, fill_alpha=0.1, fill_color=rh.color) # p.add_layout(normal) extras: Dict[str, Optional[str]] = dict(color=None) col = rh.color if col is None: col = color if len(rhs) > 1: logging.warning( "Multiple ranges for %s don't have colour, this will result in ranges overlapping: %s", f, rhs) # at least make the separators visible extras = dict(color='black', line_dash='dotted') # todo hide by default? # hmm, without left=, it plots at timestamp 0 =/ p.hbar( left=min(fdf['dt']), right=max(fdf['dt']), y=(rh.low + rh.high) / 2, height=rh.high - rh.low, # eh, this is awkward.. fill_color=col, fill_alpha=0.1, legend_label=f'{f} ranges', **extras, ) p.title.text = str(grp) if x_range is None: x_range = p.x_range plots.append(p) return gridplot([[x] for x in plots])
def rolling(*, x: str, y: str, df, avgs: Sequence[Avg] = ['7D', '30D'], legend_label=None, context: Optional[RollingResult] = None, **kwargs) -> RollingResult: # TODO maybe use a special logging handler, so everything logged with warning level gets displayed? errors = [] # todo ugh. the same HPI check would be nice.. tzs = set(df.index.map(lambda x: getattr(x, 'tzinfo', None))) # meh if len(tzs) > 1: errors.append( f'WARNING: a mixture of timezones: {tzs}. You might want to unlocalize() them first.' ) elif len(tzs) == 1: [_tz] = tzs if _tz is not None: # todo not really sure about that.. maybe it's okay, although UTC might be wrong as well errors.append( f'WARNING: detected timezone: {_tz}. You might want to unlocalize() first.' ) # todo should copy df first?? if legend_label is None: legend_label = y # meh... don't think I like it # TODO def test this if context is None: ls = [] plot = date_figure(df=df) ls.append(plot) ctx = RollingResult( layout=column(ls, sizing_mode='stretch_width'), plots=[], figures=[plot], ) else: ctx = context plot = ctx.figure plots = ctx.plots layouts = ctx.layout.children # todo assert datetime index? test it too # todo although in theory it doens't have to be datetimes with the approprivate avgs?? has_x = df.index.notna() has_y = df[y].notna() err = df['error'].notna( ) if 'error' in df.columns else df.index == 'how_to_make_empty_index?' # ^^^ todo a bit ugly... think about this better for_table = ~has_x # case 1 is handled # case 2: set proper error for ones that don't have y df.loc[has_x & ~has_y & ~err, 'error'] = f'{y} is nan/null' # now case 2 and 3 are the same # case 3 case_3 = has_x & ~has_y for_table |= case_3 for_marks = case_3 # case 4, 5 ok = has_x & has_y case_4 = ok & err for_table |= case_4 for_warn = case_4 dfm = df.loc[for_marks] dfe = df.loc[for_table] dfw = df.loc[for_warn] df = df.loc[ok] if len(dfm) > 0: # todo meh.. how to make the position absolute?? some_y = df[y].quantile( 0.8) # to display kinda on top, but not too high if np.isnan(some_y): # otherwise fails during JSON serialization some_y = 0.0 plot.scatter( source=CDS(dfm), x=x, y=some_y, legend_label='errors', line_color='red', fill_color='yellow', # ?? marker='circle_cross', size=10, ) if len(dfe) > 0: errors.append(f'Also encountered {len(dfe)} errors:') from bokeh.models.widgets.markups import Div # first a summary for the most important warnings/errors # todo append later stuff as well, there are some warnings during means for e in errors: layouts.append( Div( text=html.escape(e), style={ 'color': 'red', 'font-weight': 'strong' }, )) if len(dfe) > 0: # todo could even keep the 'value' erorrs and display below too.. but for now it's ok # ok, if it respected newlines, would be perfect # for now this is 'fine'... # todo maybe should display all points, highlight error ones as red (and it sorts anyway so easy to overview?) # todo would be nice to highlight the corresponding points in table/plot from bokeh.models.widgets import DataTable, DateFormatter, TableColumn from bokeh.models.widgets.tables import DateFormatter, NumberFormatter, HTMLTemplateFormatter # didn't work at all?? # from bokeh.models.widgets.tables import ScientificFormatter # todo DataCube?? even more elaborate dfe = dfe.reset_index( ) # todo ugh. otherwise doesn't display the index at all? dfe = dfe.sort_values(by=x) # todo maybe display 'error' as the first col? datefmt = DateFormatter(format="%Y-%m-%d") # todo speed_avg could have less digits (guess by the dispersion or something??) # TODO horrible, but js bits of bokeh compute some complete bullhit for column widths # todo set monospace font?? one_char = 10 # pixels def datatable_columns(df): for c, t in df.dtypes.items(): formatter = None # TODO also use col name.. then won't have to handle nans! width = 15 # in characters # for fixed width types, we can have something kind of reasonable if str(t).startswith('float'): l = df[c].dropna().map(str).str.len().max() width = 4 if np.isnan(l) else l if str(t).startswith('datetime'): formatter = DateFormatter(format='%Y%m%d %H%M%S %Z', nan_format='Nan') width = 15 elif str(t).startswith('timedelta'): # TODO warn if df contains stuff with duration >1D? # without nan_format, it results in NaN:Nan:Nan formatter = DateFormatter(format='%H:%M:%S', nan_format='Nan') width = 8 # if c == 'error': # # meh, but the only easy way to limit and ellipsize it I found # # aaand it still computes width in some weird way, ends up taking too much space # formatter = HTMLTemplateFormatter(template='<div style="text-overflow: ellipsis; overflow: hidden; width: 60ch;"><%= value %></div>') tc = TableColumn( field=c, title=c, **({} if formatter is None else dict(formatter=formatter)), width=width * one_char, ) yield tc # TODO hmm, if we reuse the data source, editing & selection might work? errors_table = DataTable( source=CDS(dfe), columns=list(datatable_columns(dfe)), # todo ugh. handle this properly, was too narrow on the sleep plots editable=True, width=2000, # default ends up in trimmed table content autosize_mode='none', # this might overstretch the parent... # autosize_mode='fit_viewport', # this just makes it respect the parent width # width_policy='fit', ) layouts.append(errors_table) # todo # >>> plot.circle([1,2,3], [4,5,6], name="temp") # >>> plot.select(name="temp") # [GlyphRenderer(id='399d53f5-73e9-44d9-9527-544b761c7705', ...)] if len(dfw) > 0: plot.circle(source=CDS(dfw), x=x, y=y, legend_label='warnings', size=20, color='yellow') # todo warn if unsorted? df = df.sort_index() if len(df) == 0: # add a fake point, so at least plotting doesn't fail... df = pd.DataFrame([{ x: datetime(year=2000, month=1, day=1), y: 0.0, }]).set_index(x) avgs = ['3D' for _ in avgs] # FIXME need to add this to errors as well, or at least title.. # TODO need to add a better placholder, timestamp 0 really messes things up warnings.warn(f'No data points for {df}, empty plot!') if None not in avgs: plots.append( plot.scatter(x=x, y=y, source=CDS(df), legend_label=legend_label, **kwargs)) # only stuff without errors/warnings participates in the avg computation if 'error' in df.columns: # meh df = df[df['error'].isna()] for period in [a for a in avgs if a is not None]: dfy = df[[y]] if str(dfy.index.dtype) == 'object': logging.error( f"{dfy.dtypes}: index type is 'object'. You're likely doing something wrong" ) if 'datetime64' in str(dfy.index.dtype): # you're probably doing something wrong otherwise.. # todo warn too? # check it's a valid period pd.to_timedelta(period) # TODO how to fill the missing values?? # a sequence of consts would be a good test for it # todo why would index be na at this point? probably impossible? dfa = dfy[dfy.index.notna()].rolling(period).mean() # TODO assert x in df?? or rolling wrt to x?? # somehow plot.line works if 'x' is index? but df[x] doesnt.. # todo different style by default? thicker line? not sure.. plots.append( plot.line(x=x, y=y, source=CDS(dfa), legend_label=f'{legend_label} ({period} avg)', **kwargs)) plot.title.text = f'x: {x}, y: {y}' # TODO axis labels instead? return ctx
def scatter_matrix( df, *, xs: Sequence[str] = None, ys: Sequence[str] = None, width=None, height=None, regression=True, **kwargs, ): assert len(df) > 0, 'TODO handle this' # FIXME handle empty df source = CDS(df) # TODO what about non-numeric stuff? xs = df.columns if xs is None else xs ys = df.columns if ys is None else ys ys = list(reversed( ys)) # reorder to move meaningful stuff to the top left corner isnum = lambda c: is_numeric_dtype(df.dtypes[c]) # reorder so non-numeric is in the back # todo mode to drop non-numeric? not sure.. definitely can drop 'error' and datetimish? xs = list(sorted(xs, key=isnum, reverse=True)) ys = list(sorted(ys, key=isnum, reverse=True)) from bokeh.models import Label # TODO not sure I wanna reuse axis? def make(xc: str, yc: str): p = figure(df=df) diag = xc == yc # todo handle properly # TODO not sure if I even want them... move to the very end? if isnum(xc) and isnum(yc): p.scatter(x=xc, y=yc, source=source, size=3) else: # TODO ugh, doesn't want to show the label without any points?? # p.circle(x=0.0, y=0.0) # FIXME how to make sure text fits into the plot?? add_text( p, x=0.0, y=0.0, text='Not numeric', text_color='red', ) p.xaxis.axis_label = xc p.yaxis.axis_label = yc return p grid = [[make(xc=x, yc=y) for x in xs] for y in ys] from bokeh.layouts import gridplot w1 = None if width is None else width // min(len(xs), len(ys)) h1 = None if height is None else height // min(len(xs), len(ys)) grid_res = gridplot(grid, plot_width=w1, plot_height=h1) # TODO might be useful to include/exclude specific cols (e.g. datetime) while keeping them in annotations # TODO add the presence of the grid to the 'visual tests' # but if I swith it to raw bokeh -- it has Grid class.. might need to mess with # also maybe add extra axis under each plot in the grid? easier for a huge matrix of plots # some code in old dashboard if not regression: return grid_res # todo this would be need for plotly as well? import statsmodels.formula.api as smf # type: ignore for plot in chain.from_iterable(grid): gs = plot.renderers if len(gs) == 0: # must be non-numeric? meh though continue [g] = gs xx = g.glyph.x yy = g.glyph.y if xx == yy: # diagonal thing, e.g. histogram. compute some stats?? continue with pd.option_context('mode.use_inf_as_null', True): # FIXME proper error handling, display number of dropped items? dd = df[[xx, yy]].dropna() # otherwise from_scatter fails # todo would be nice to display stats on the number of points dropped udd = dd.drop_duplicates() if len(udd) <= 1: # can't perform a reasonable regression then add_text( plot, x=0.0, y=0.0, text='ERROR: no points to correlate', text_color='red', ) continue res = smf.ols(f"{yy} ~ {xx}", data=dd).fit() intercept = res.params['Intercept'] slope = res.params[xx] r2 = res.rsquared ## TODO crap. is it really the best way to figure out relative position?? relx = 0.01 rely = 0.1 # todo highlight high enough R2? minx, maxx = min(dd[xx]), max(dd[xx]) miny, maxy = min(dd[yy]), max(dd[yy]) # todo font size dependent on width?? ugh. txt = f'R2 = {r2:.4f}\nY ~ {slope:.3f} X' # todo need to add various regression properties, like intercept, etc # TODO hopefuly this overlays correctly?? not sure about nans, again from bokeh.models import Slope sl = Slope(gradient=slope, y_intercept=intercept, line_color='green', line_width=3) plot.add_layout(sl) add_text( plot, text=txt, x=minx + (maxx - minx) * relx, y=miny + (maxy - miny) * rely, text_color=g.glyph.line_color, ) # TODO dynamic resizing would be nice return grid_res