def basicLinePlot( y, # [n_sigs, n_bins] array (each signal is 1 row) x=None, # either [n_bins] array-like signal, or [n_signs, n_bins] signal title='', xlbl='', ylbl='', names=None, # list of legend entries show_leg=True, # whether to show leg plot=True): ''' Plots a basic line. No frills (yet)''' y = np.atleast_2d(y) [n_sigs, n_bins] = y.shape if names == None: names = ['S_%d' % (n + 1) for n in range(n_sigs)] traces = [] for n, sig in enumerate(y): traces += [go.Scatter(y=sig, x=x, name=names[n])] layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={'title': ylbl}, # yaxis={'title': ylbl}, hovermode='closest', showlegend=show_leg, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def plotTable2( data, top_headers, width=None, plot=True, title=None, ): ''' Wrapper for plotly table function NOTE: this is NOT compatible w/ dashboards as plotly table object doesnt have a ._data field & thus cant easily be jsonified :return: ''' colors = cl.scales['5']['seq']['Blues'] trace = go.Table( header=dict( values=top_headers, line=dict(color='#7D7F80'), fill=dict(color='#a1c3d1'), font=dict(color='white', size=12), height=None, # row-height align=['left'] * 5), cells=dict(values=data, line=dict(color='#7D7F80'), fill=dict(color='#EDFAFF'), align=['left'] * 5), hoverinfo='x+y+name') layout = dict(width=width, height=None, title=title) data = [trace] fig = dict(data=data, layout=layout) return plotOut(fig, plot)
def plotDF( df, # pandas DF title='', # title of plot ylbl='', # ylabel xlbl=None, # if None, uses df.index.name linemode='lines', # 'lines'/'markers'/'lines+markers' cat_col=None, # if name, then shades BG according to the label opacity=.7, # transparaency of lines. [0.0, 1.0] norm=None, # None or input to norm_mat plot=True, # 1/0 whether we want to plot each of the individual lines ): """ This plots a pandas DF. NOTE: see also plotly's cufflinks package which makes pnadas plotting super easy! cf.go_offline() df.iplot(kind='scatter') """ nbins, ncols = df.shape # convert cat columns to numeric columns for col in df.columns: if df[col].dtype.name == 'category': df[col] = df[col].cat.codes # make line colors colors = cl.scales[str(max(3, ncols))]['qual']['Set3'] tcols = ['rgba%s,%.2f)' % (c[3:-1], opacity) for c in colors] # normalize columns if norm is not None: for col in df.columns: df[col] = norm_mat(df[col].values, method='zscore') traces = [ go.Scatter(x=df.index, y=df[col].values, name=col, mode=linemode, line={"color": tcols[i]}) for i, col in enumerate(df.columns) ] if xlbl is None: xlbl = df.index.name layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={'title': ylbl}, showlegend=True, ) # shade background based on label if cat_col is not None: layout.shapes = labelsShading(df[cat_col].values) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def _plotSubplots(trace_array, vert_spacing=.1, title = '', ylbl='', # currently buggy xlbl='', sp_titles=None, # 2d np array of strings for subplot titles plot=True ): ''' Internal function to make subplots based on passed traces, which are in a 2d np array ''' n_rows, n_cols = trace_array.shape fig = make_subplots(rows=n_rows, cols=n_cols, shared_xaxes=True, vertical_spacing=vert_spacing, subplot_titles=sp_titles.flatten().tolist(), ) for r in range(n_rows): for c in range(n_cols): [fig.append_trace(trace, r+1, c+1) for trace in trace_array[r,c]] fig.layout.title = title # fig.layout.xaxis = {'title': xlbl} # this ruins the shared x-axis for some reason #fig.layout.yaxis = {'title': ylbl} fig.layout.showlegend = True return plotOut(fig, plot)
def plotTable( data, top_headers=None, # only required if data is list/nparray, not for pandas df width=None, plot=True, title=None, ): ''' Wrapper for plotly table function :return: ''' import pandas as pd if type(data) == pd.core.frame.DataFrame: top_headers = data.columns tbl_data = data.values # TODO: this should only be done for numeric datatypes tbl_data = tbl_data.astype('|S7').astype(str) inp_data = np.vstack((top_headers, tbl_data)) fig = ff.create_table(inp_data, hoverinfo='skip') fig.layout.width = width fig.layout.title = title fig.layout.margin = {'b': 80, 'r': 80} return plotOut(fig, plot)
def plotMultiPR(y_true, # list of true labels y_scores, # array of scores for each class of shape [n_samples, n_classes] title = 'Multiclass PR Plot', labels = None, # list of labels for each class threshdot=None, # whether to plot a dot @ the threshold plot=True, # 1/0. If 0, returns plotly json object, but doesnt plot ): """ Makes a multiclass ROC plot """ y_true = np.array(y_true) y_scores = np.array(y_scores) N, n_classes = y_scores.shape if n_classes == 1: # needed to avoid inverting when doing binary classification y_scores = -1*y_scores # calc ROC curves & AUC precision = dict() recall = dict() pr_auc = dict() thresh = dict() thresh_txt = dict() for i in range(n_classes): precision[i], recall[i], thresh[i] = sk.metrics.precision_recall_curve(y_true == i, y_scores[:, i]) #average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i]) #pr_auc[i] = sk.metrics.auc(precision[i], recall[i]) pr_auc[i] = 1 thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]] if labels is None: labels = ['C%d' % n for n in range(1, n_classes+1)] labels = [str(x) for x in labels] # convert to str # make traces traces = [] [traces.append(go.Scatter(y=precision[i], x=recall[i], name=labels[i] + '. AUC= %.2f' % (pr_auc[i]), text=thresh_txt[i], legendgroup=str(i), line={'width': 1})) for i in range(n_classes)] if threshdot is not None: for i in range(n_classes): c_indx = (np.abs(thresh[i]-threshdot)).argmin() traces += [go.Scatter(x=[recall[i][c_indx]]*2, y=[precision[i][c_indx]]*2, mode='markers', name='Threshold', legendgroup=str(i), showlegend=False)] # make layout layout = go.Layout(title=title, xaxis={'title': 'Precision = P(y=1 | yp=1)'}, # 'Precision = P(yp=y | yp=1)' yaxis={'title': 'Recall = TPR = P(yp=1 | y=1)'}, # 'Recall = TPR = P(yp=y | y=1)' legend=dict(x=1), hovermode='closest', ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def scatterMatrix(df, title='Scatterplot Matrix', plot=True): # if false, just returns plotly json object """ This makes a scattermatrix for data """ cols = df.columns N = len(cols) fig = py.tools.make_subplots(rows=N, cols=N) for n1 in range(1, N + 1): for n2 in range(1, n1 + 1): #print('n1:%d, n2:%d' %(n1,n2)) if n1 == n2: #plot hist ff = plotHist( df[cols[n1 - 1]], # 1D list/np vector of data maxData= 500, # max # of points to plot above histogram (if too high, it will be slow) plot= False, # 1/0. If 0, returns plotly json object, but doesnt plot rm_outliers=True, # 1/0 whether to remove outliers or not density=True, # whether to plot PDF or count boxplot=0, scatter=0, diff_tst=0) [fig.append_trace(d, n1, n2) for d in ff.data] if n2 < n1: # plot scatter ff = corrPlot( df[cols[n1 - 1]], # 1D data vector or list of 1D dsata vectors df[cols[n2 - 1]], # 1D data vector or list of 1D dsata vectors maxdata= 500, # max # of points to plot above histogram (if too high, it will be slow) addCorr= False, # whether to add correlation statistics into plot (R2, spearmanR2, Pvals, & y=mx+b) addCorrLine=False, # whether to plot correlation line addXYline=False, # whether to plot y=x line plot=False, # if false, just returns plotly json object ) [fig.append_trace(d, n1, n2) for d in ff.data] fig['layout'].update(title=title) fig['layout'].update(showlegend=False) [ fig['layout']['yaxis' + str((n - 1) * N + 1)].update(title=cols[n - 1]) for n in range(1, N + 1) ] return plotOut(fig, plot)
def plotMultiROC(y_true, # list of true labels y_scores, # array of scores for each class of shape [n_samples, n_classes] title = 'Multiclass ROC Plot', labels = None, # list of labels for each class threshdot = None, plot=True, # 1/0. If 0, returns plotly json object, but doesnt plot ): """ Makes a multiclass ROC plot """ y_true = np.array(y_true) y_scores = np.array(y_scores) N, n_classes = y_scores.shape if n_classes == 1: # needed to avoid inverting when doing binary classification y_scores = -1*y_scores # calc ROC curves & AUC fpr = dict() tpr = dict() thresh = dict() thresh_txt = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], thresh[i] = sk.metrics.roc_curve(y_true == i, y_scores[:, i]) roc_auc[i] = sk.metrics.auc(fpr[i], tpr[i]) thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]] labels = [str(x) for x in labels] # convert labels to str # make traces traces = [] [traces.append(go.Scatter(y=tpr[i], x=fpr[i], name=labels[i] + '. AUC= %.2f' % (roc_auc[i]), text=thresh_txt[i], legendgroup=str(i), line={'width': 1})) for i in range(n_classes)] traces += [go.Scatter(y=[0, 1], x=[0, 1], name='Random classifier', line={'width': 1, 'dash': 'dot'})] if threshdot is not None: for i in range(n_classes): c_indx = (np.abs(thresh[i]-threshdot)).argmin() traces += [go.Scatter(x=[fpr[i][c_indx]]*2, y=[tpr[i][c_indx]]*2, mode='markers', name='Threshold', legendgroup=str(i), showlegend=False)] # make layout layout = go.Layout(title=title, xaxis={'title': 'FPR'}, yaxis={'title': 'TPR'}, legend=dict(x=1), hovermode='closest', ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def basicHeatmap(z, x=[], y=[], title='', xlbl='', ylbl='', plot=True): ''' Plots a basic heatmap''' traces = [go.Heatmap(z=z, x=x, y=y)] layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={'title': ylbl}, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def basicBarPlot( data, # list of #'s names=None, # xtick labels. Can be numeric or str title='', ylbl='', xlbl='', text=None, # list of txt vals or 'numb' for numbers orient=None, sort=False, # if True, sorts from greatest to least line=None, # add line perpendicular to bars (eg to show mean) color='rgb(158,202,225)', # barplot internal color width=None, # plot width. If None, autoscales plot=True): """ Makes a basic bar plot where data is [n,1] list of values. No averaging/etc... For that see barPlot or propBarPlot EX: psp.basicBarPlot([1,2,3,2]) """ if sort: ord = np.argsort(data)[::-1] data = np.array(data)[ord] if names is not None: names = np.array(names)[ord] if text == 'numb': text = [str(x) for x in data] traces = [ go.Bar(x=names, y=data, text=text, textposition='auto', marker=dict( color=color, line=dict(color='rgb(8,48,107)', width=1.5), ), opacity=0.6) ] layout = go.Layout( title=title, yaxis={'title': ylbl}, xaxis={'title': xlbl}, hovermode='closest', width=width, ) if line: layout.shapes = [hline(line)] fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def tornadoPlot( vals, # in Nx3 array, where columns are[low_val, orig_val, high_val] names, # parameter names (list of str) title, width=40, xlbl='Output node probability', plot=True): """ Makes a tornado plot in plotly """ n_pars = len(names) traces = [] # positive change lines traces += [ go.Scatter(x=row[1:], y=[names[i]] * 2, name=names[i], legendgroup='pos_change', line={ 'color': 'green', 'width': width }) for i, row in enumerate(vals) ] traces += [ go.Scatter(x=row[:2], y=[names[i]] * 2, name=names[i], legendgroup='neg_change', line={ 'color': 'red', 'width': width }) for i, row in enumerate(vals) ] layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={ 'position': .5, 'autorange': 'reversed' }, # yaxis={'title': ylbl}, hovermode='closest', showlegend=False, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def plotPolar( data, # N-d list/numpy array names=None, # names of cols in data (ex:['A', 'B'] scatter=True, # whether to do polar scatter plot. Only works if N=1 maxData=1000, # max # of points to plot above histogram (if too high, it will be slow) hist=True, # 1/0 whether to plot histogram of points numbins=40, # bins in histogram normHist=True, # whether to normalize histogram title='Polar Distribution', # title of plot plot=True): # 1/0. If 0, returns plotly json object, but doesnt plot """ This plots a polar plot of data in plotly Usage: x1 = np.random.uniform(-np.pi, np.pi, (100)) x2 = np.random.uniform(-np.pi, np.pi, (200)) plotPolar([x1,x2], names=['A', 'B'], numbins=50) """ ## Basic formatting if type(data) != np.ndarray: data = np.array(data) if np.issubdtype(data.dtype, np.number): #given an np array data = np.atleast_2d(data) N, Lx = data.shape Lx = np.matlib.repmat(Lx, 1, N) else: #given a data array N = len(data) Lx = [len(l) for l in data] if names is None: names = [str(i + 1) for i in range(N)] # make sure all data in radians [ print('All data must be within +-pi') for col in data if (np.min(col) < -np.pi) or (np.max(col) > np.pi) ] if N > 1: lg = names showleg = True cols = cl.scales[str(N + 1)]['qual']['Set1'] else: lg = [None] showleg = False cols = ['blue'] # scale markersize Lxp = np.min([max(Lx), maxData]) if Lxp > 5000: markersize = 1 elif Lxp > 2000: markersize = 2 elif Lxp > 1000: markersize = 3 elif Lxp > 200: markersize = 4 elif Lxp > 80: markersize = 5 elif Lxp > 25: markersize = 7 else: markersize = 9 traces = [] ## Histogram if hist: hy, hx = zip(*[ np.histogram( col, bins=numbins, density=normHist, range=[-np.pi, np.pi]) for col in data ]) hx = np.array(hx) hy = np.array(hy) # add first element to last to complete the circle hx = np.hstack((hx, hx[:, 0:1])) hy = np.hstack((hy, hy[:, 0:1])) # t=theta, r=radius traces += [ go.Scatter(t=hx[n] / np.pi * 180, r=hy[n], name=names[n], mode='lines', line={ 'width': 3, 'color': cols[n] }, hovertext=names[n], hoverinfo='name+r+t') for n in range(N) ] top = np.max(hy.flatten()) * 1.2 else: top = 1 ## Scatter if scatter and N == 1: jitter = .05 # if data too large only plot a subset if Lx[0, 0] > maxData: Np = maxData dataToPlot = np.random.choice(data[0], Np, replace=False) else: dataToPlot, Np = data[0], Lx[0, 0] traces += [ go.Scatter(r=top + np.random.normal(size=Np) * top * jitter, t=data[0] / np.pi * 180, mode='markers', name=names[0] + ' scatter', marker={ 'size': markersize, 'color': cols[0] }) ] ## make fig layout = go.Layout(title=title, showlegend=showleg) fig = go.Figure(data=traces, layout=layout) #pyo.plot(fig) return plotOut(fig, plot)
def plotHist2D( x, # 1D vector y, # 1D vector bins=[15, 30], # # of bins in histogram xlbl='', ylbl='', title='', log=False, # whether to log the histogram counts mean=False, # whether to overlay mean + std dhading onto heatmap plot=True): """ plots 2D heatmap. Does the binning in np as its faster than plotly 2D hist """ x = np.array(x) y = np.array(y) maxstd = 8 # if max above this many stddevs from mean, it is clipped percclip = [5, 95] # percentile above which it is clipped meanx, stdx, minx, maxx = np.mean(x), np.std(x), np.min(x), np.max(x) xbins = np.linspace( *np.percentile(x, percclip), bins[0] ) if meanx + maxstd * stdx < maxx or meanx - maxstd * stdx > minx else bins[ 0] meany, stdy, miny, maxy = np.mean(y), np.std(y), np.min(y), np.max(y) ybins = np.linspace( *np.percentile(y, percclip), bins[1] ) if meany + maxstd * stdy < maxy or meany - maxstd * stdy > miny else bins[ 1] H, xedges, yedges = np.histogram2d(x, y, bins=[xbins, ybins], normed=False) H = H.T # extremely important!!!!! if log: H[H == 0] = np.nan H = np.log10(H) zlbl = 'log(Count)' else: zlbl = 'Count' hist = go.Heatmap( x=xedges, # sample to be binned on the x-axis y=yedges, # sample to be binned on of the y-axis z=H, name='Heatmap', showlegend=True, zsmooth='best', # (!) apply smoothing to contours colorscale='Portland', # choose a pre-defined color scale colorbar=go.ColorBar( titleside='right', # put title right of colorbar ticks='outside', # put ticks outside colorbar title=zlbl, )) plots = [hist] # plotting trendline if mean: Hnorm = copy.deepcopy(H) Hnorm[np.isnan(Hnorm)] = 0 Hnorm = Hnorm / np.sum(Hnorm, axis=0) Px_given_y = np.atleast_2d(yedges[:-1]) @ Hnorm dx = xedges[1] - xedges[0] meanLine = [ go.Scatter(x=xedges + dx / 2, y=Px_given_y[0], name='Trendline', showlegend=True) ] plots = meanLine + plots layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={'title': ylbl}, showlegend=True, ) fig = go.Figure(data=plots, layout=layout) return plotOut(fig, plot)
def plotMultiROC( y_true, # list of true labels y_scores, # array of scores for each class of shape [n_samples, n_classes] title='Multiclass ROC Plot', n_points=100, # reinterpolates to have exactly N points labels=None, # list of labels for each class threshdot=None, return_auc=False, plot=True, # 1/0. If 0, returns plotly json object, but doesnt plot ): """ Makes a multiclass ROC plot. Can also be used for binary ROC plot """ y_true = np.array(y_true) y_scores = np.array(y_scores) if y_scores.ndim == 1: # convert to [n_samples, n_classes] even if 1 class y_scores = np.atleast_2d(y_scores).T N, n_classes = y_scores.shape if n_classes == 1: # needed to avoid inverting when doing binary classification y_scores *= -1 if threshdot is not None: threshdot *= -1 # calc ROC curves & AUC fpr = dict() tpr = dict() thresh = dict() thresh_txt = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], thresh[i] = sk.metrics.roc_curve( y_true == i, y_scores[:, i]) roc_auc[i] = sk.metrics.auc(fpr[i], tpr[i]) if n_points is not None: x = np.linspace(0, 1, n_points) indxs = np.searchsorted(tpr[i], x) tpr[i] = tpr[i][indxs] fpr[i] = fpr[i][indxs] thresh[i] = thresh[i][indxs] # Add endpoints for proper AUC calcs tpr[i] = np.concatenate(([0], tpr[i], [1])) fpr[i] = np.concatenate(([0], fpr[i], [1])) thresh[i] = np.concatenate(([np.inf], thresh[i], [-np.inf])) thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]] if len(labels) != n_classes: print( f'Warning: have {len(labels)} lables, and {n_classes} classes. Disregarding labels' ) labels = None if labels is None: labels = ['C%d' % n for n in range(1, n_classes + 1)] labels = [str(x) for x in labels] # convert labels to str # make traces traces = [] [ traces.append( go.Scatter(y=tpr[i], x=fpr[i], name=labels[i] + '. AUC= %.2f' % (roc_auc[i]), text=thresh_txt[i], legendgroup=str(i), line={'width': 1})) for i in range(n_classes) ] traces += [ go.Scatter(y=[0, 1], x=[0, 1], name='Random classifier', line={ 'width': 1, 'dash': 'dot' }) ] if threshdot is not None: for i in range(n_classes): c_indx = (np.abs(thresh[i] - threshdot)).argmin() traces += [ go.Scatter(x=[fpr[i][c_indx]] * 2, y=[tpr[i][c_indx]] * 2, mode='markers', name='Threshold', legendgroup=str(i), showlegend=False) ] # make layout layout = go.Layout( title=title, xaxis={'title': 'FPR'}, yaxis={'title': 'TPR'}, legend=dict(x=1), hovermode='closest', ) fig = go.Figure(data=traces, layout=layout) if return_auc: return plotOut(fig, plot), else: return plotOut(fig, plot)
def plotMultiPR( y_true, # list of true labels y_scores, # array of scores for each class of shape [n_samples, n_classes] title='Multiclass PR Plot', n_points=100, # reinterpolates to have exactly N points labels=None, # list of labels for each class threshdot=None, # whether to plot a dot @ the threshold plot=True, # 1/0. If 0, returns plotly json object, but doesnt plot ): """ Makes a multiclass PR plot """ y_true = np.array(y_true) y_scores = np.array(y_scores) if y_scores.ndim == 1: # convert to [n_samples, n_classes] even if 1 class y_scores = np.atleast_2d(y_scores).T N, n_classes = y_scores.shape if n_classes == 1: # needed to avoid inverting when doing binary classification y_scores = -1 * y_scores # calc ROC curves & AUC precision = dict() recall = dict() pr_auc = dict() thresh = dict() thresh_txt = dict() for i in range(n_classes): precision[i], recall[i], thresh[i] = sk.metrics.precision_recall_curve( y_true == i, y_scores[:, i]) #average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i]) pr_auc[i] = np.sum(precision[i][1:] * -np.diff(recall[i])) if n_points is not None: x = np.linspace(precision[i][0], precision[i][-1], n_points) indxs = np.searchsorted(precision[i], x) precision[i] = precision[i][indxs] recall[i] = recall[i][indxs] thresh[i] = thresh[i][np.clip(indxs, 0, thresh[i].size - 1)] # Add endpoints for proper AUC calcs precision[i] = np.concatenate(([0], precision[i], [1])) recall[i] = np.concatenate(([1], recall[i], [0])) thresh[i] = np.concatenate(([-np.inf], thresh[i], [np.inf])) thresh_txt[i] = ['T=%.4f' % t for t in thresh[i]] if labels is None: labels = ['C%d' % n for n in range(1, n_classes + 1)] labels = [str(x) for x in labels] # convert to str # make traces traces = [] [ traces.append( go.Scatter(y=precision[i], x=recall[i], name=labels[i] + '. AUC= %.2f' % (pr_auc[i]), text=thresh_txt[i], legendgroup=str(i), line={'width': 1})) for i in range(n_classes) ] if threshdot is not None: for i in range(n_classes): c_indx = (np.abs(thresh[i] - threshdot)).argmin() traces += [ go.Scatter(x=[recall[i][c_indx]] * 2, y=[precision[i][c_indx]] * 2, mode='markers', name='Threshold', legendgroup=str(i), showlegend=False) ] # make layout layout = go.Layout( title=title, yaxis={ 'title': 'Precision = P(y=1 | yp=1)', 'range': [0, 1] }, # 'Precision = P(yp=y | yp=1)' xaxis={ 'title': 'Recall = TPR = P(yp=1 | y=1)', 'range': [0, 1] }, # 'Recall = TPR = P(yp=y | y=1)' legend=dict(x=1), hovermode='closest', ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def plot2Hists( x1, # data of 1st histogram x2, # data of 2nd histogram names=['A', 'B'], # legend names of x1, x2 (ex: ['A','B'] maxData=500, # max # of points to plot above histogram (if too high, it will be slow) normHist=True, # 1/0. if 1, norms the histogram to a PDF samebins=True, # whether both hists should have same edges numbins=40, # # bins in histogram title='Data Distribution', # title of plot rm_outliers=False, #1/0 whether to remove outliers or not KS=False, # whether to do 2 sample KS test for different distributions MW=False, # whether to display the Mann-Whitney/Ranksum test for difference of distributions in title T=False, # as MW, but for ttest alt='two-sided', # one-sided or two-sided hypothesis testing. See scipy for options bp=True, # whether to add barplot above histograms plot=True): # 1/0. If 0, returns plotly json object, but doesnt plot """ Plots two 1D histograms using plotly. Does the binning w/ numpy to make it go way faster than plotly's inherent histogram function Usage: """ x1 = np.array(x1) x2 = np.array(x2) N1, N2 = len(x1), len(x2) # Remove NaNs x1 = x1[~np.isnan(x1)] x2 = x2[~np.isnan(x2)] # remove outliers & get basic stats adj1, corr_data1, outliers1, rng1, stats1 = removeOutliers( x1, stdbnd=6, percclip=[5, 95], rmv=rm_outliers) adj2, corr_data2, outliers2, rng2, stats2 = removeOutliers( x2, stdbnd=6, percclip=[5, 95], rmv=rm_outliers) if samebins: jointrng = [min(rng1[0], rng2[0]), max(rng1[1], rng2[1])] bins = np.linspace(jointrng[0], jointrng[1], numbins) else: bins = numbins hy1, hx1 = np.histogram(x1, bins=bins, density=normHist, range=rng1) hy2, hx2 = np.histogram(x2, bins=bins, density=normHist, range=rng2) top = np.max(np.hstack((hy1, hy2))) * 1.1 # hist plots traces = [] hist1 = go.Bar(x=hx1, y=hy1, name=names[0], legendgroup=names[0], opacity=.5, marker=dict(color='red', line=dict(color='black', width=2))) hist2 = go.Bar(x=hx2, y=hy2, name=names[1], legendgroup=names[1], opacity=.5, marker=dict(color='blue', line=dict(color='black', width=2))) traces += [hist1, hist2] # data plots if N1 > maxData: # if data too large only plot a subset Np = maxData dataToPlot = np.random.choice(x1, Np, replace=False) else: dataToPlot, Np = x1, N1 dataPlot1 = go.Scatter(x=dataToPlot, y=top * 1.2 + np.random.normal(size=Np) * top * .03, mode='markers', marker=dict(size=2, color='red'), hoverinfo='x+name', name=names[0], legendgroup=names[0], showlegend=False) if N2 > maxData: # if data too large only plot a subset Np = maxData dataToPlot = np.random.choice(x2, Np, replace=False) else: dataToPlot, Np = x2, N1 dataPlot2 = go.Scatter(x=dataToPlot, y=top + np.random.normal(size=Np) * top * .03, mode='markers', marker=dict(size=2, color='blue'), hoverinfo='x+name', name=names[1], legendgroup=names[1], showlegend=False) traces += [dataPlot1, dataPlot2] # Boxplots if bp: bp1 = boxPlot(stats1['med'], np.percentile(x1, [25, 75]), rng1, mean=stats1['mean'], name=names[0], horiz=True, offset=top * 1.3, legendGroup=names[0], plot=False, col='red') bp2 = boxPlot(stats2['med'], np.percentile(x2, [25, 75]), rng2, mean=stats2['mean'], name=names[1], horiz=True, offset=top * 1.1, legendGroup=names[1], plot=False, col='blue') traces = traces + bp1 + bp2 # Stat testing if MW: stat, p_MW = sp.stats.mannwhitneyu(x1, x2, alternative=alt) title += ' P_MW=%.3f' % (p_MW) if T: stat, p_T = sp.stats.ttest_ind(x1, x2, equal_var=True, nan_policy='omit') title += ' P_T=%.3f' % (p_T) if KS: stat, p_KS = sp.stats.ks_2samp(x1, x2) title += ' P_KS=%.3f' % (p_KS) plotrng = [min(rng1[0], rng2[0]) * .9, min(rng1[1], rng2[1]) * 1.1] ylbl = 'Denisty' if normHist else 'Count' fig = go.Figure(data=traces, layout={ 'title': title, 'yaxis': { 'title': ylbl }, 'xaxis': { 'range': plotrng }, 'barmode': 'overlay', 'bargap': 0, 'hovermode': 'closest', }) return plotOut(fig, plot)
def multiLine( data, # [N,Lx] numpy array or list, where rows are each line x=None, # optional x-data z=None, # optional z (color) data txt=None, # optional txt over points lines=True, # 1/0 whether we want to plot each of the individual lines mean=False, # True/False where want mean+std line names=None, # names of each data list plot=True, # if false, just returns plotly json object title='', # title of plot ylbl='', # xlbl='', # norm=None): # input to norm_mat function if want to norm the data """ Plots bunch of lines + mean in plotly Ex: psp.multiLine(data, x=x, names=[], xlbl='', ylbl='', title='') """ data, x, z, names, info = _massageData(data, x=x, z=z, names=names) N, Lx = info['n_sigs'], info['n_bins'] uniquex = not info['x_info']['shared'] if norm is not None: data = norm_mat(data, method=norm) if info['z_info']['provided']: assert N == 1, 'So far coloring only works w/ 1 data series' cols = z showleg = False showscale = True line_col = ['black'] line_mode = 'lines+markers' scattertext = ['z=%d' % (i) for i in range(Lx)] if txt is None else txt else: if N > 1: showleg = False cols = _getCols(N) else: showleg = True cols = ['blue'] line_col = cols line_mode = 'lines' showscale = False if txt is None: scattertext = '' else: scattertext = txt traces = [] if lines: for i in range(N): traces += [ go.Scatter(y=data[i], x=x[i * uniquex], name=names[i], line={'width': 1}, mode=line_mode, text=scattertext, marker={ 'size': 2, 'color': cols[i], 'showscale': showscale, 'colorscale': 'Portland' }) ] if mean and not uniquex: mean = np.mean(data, axis=0) std = np.std(data, axis=0) plotmean = go.Scatter(x=x[0], y=mean, name='Mean', legendgroup='mean', line={'width': 6}) ploterror_top = go.Scatter( x=x[0], y=mean + std, fill='none', fillcolor='rgba(0,100,80,0.2)', mode='lines', marker=dict(color='rgba(20,100,80,0)'), line=dict(width=0), showlegend=False, legendgroup='mean', name='upper bound', opacity=.7, ) ploterror_bottom = go.Scatter( x=x[0], y=mean - std, fill='tonexty', fillcolor='rgba(0,100,80,0.2)', mode='lines', marker=dict(color="444"), line=dict(width=0), showlegend=False, legendgroup='mean', name='lower bound', opacity=.7, ) traces = [plotmean, ploterror_top, ploterror_bottom] + traces if info['x_info']['provided'] and isinstance(x[0][0], str): xaxis = { 'title': xlbl, 'showgrid': True, 'showticklabels': True, 'tickvals': x[0], 'tickfont': dict(size=18) } else: xaxis = {'title': xlbl} layout = go.Layout( title=title, xaxis=xaxis, yaxis={'title': ylbl}, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def propBarPlot( data, # list of 1D data vectors names=None, # names of data vectors title=' ', # title of plot ylbl='Proportion', # y-label\ plot=True): """ Makes a custom plotly proportion barplot :return: """ data = np.array(data) N = len(data) Lx = [len(col) for col in data] print(Lx) if names is None: names = [str(i + 1) for i in range(N)] if N >= 3: cols = cl.scales[str(N)]['qual']['Set1'] else: cols = cl.scales[str(3)]['qual']['Set1'][0:N] jitter = .03 means = [np.mean(col) for col in data] std = [(means[n] * (1 - means[n]) / Lx[n])**.5 for n in range(N)] traces = [] bars = [ go.Bar(x=list(range(N)), y=means, marker=dict(color=cols), text=['N = %d' % (l) for l in Lx], name='BAR', error_y=dict(type='data', array=std, visible=True), showlegend=False) ] traces += bars xaxis = go.XAxis( # title="", showgrid=True, showline=True, ticks="", showticklabels=True, linewidth=2, ticktext=names, tickvals=list(range(N)), tickfont=dict(size=18)) layout = go.Layout( title=title, xaxis=xaxis, yaxis={'title': ylbl}, bargap=.5, hovermode='closest', showlegend=False, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def barPlot( data, # list of 1D data vectors names=None, # names of data vectors maxData=500, # max # of points to plot above histogram (if too high, it will be slow) title=' ', # title of plot ylbl='Mean', # y-label bar=True, # 1/0. If 0, makes boxplot instead of barplot stats=[], # which stat tests to run, including [ttest, MW, ANOVA, KW] (kruchsal-wallis) plot=True): # 1/0. If 0, just returns fig object """ Makes a custom plotly barplot w/ data on side Ex: barPlot(data, names, title='Plot Title', ylbl='Metric') """ # TODO: add outlier removal data = np.array(data) N = len(data) Lx = [len(col) for col in data] # remove NaNs data = [removeNaN(col) for col in data] if names is None: names = [str(i + 1) for i in range(N)] if N < 3: cols = cl.scales[str(3)]['qual']['Set1'][0:N] elif N <= 12: cols = cl.scales[str(N)]['qual']['Set3'] else: cols = [None] * N jitter = .03 means = [np.mean(col) for col in data] meds = [np.median(col) for col in data] std = [np.std(col) for col in data] traces = [] if bar: bars = [ go.Bar(x=list(range(N)), y=means, marker=dict(color=cols), text=['median= %.4f' % (m) for m in meds], name='BAR', error_y=dict(type='data', array=std, visible=True), showlegend=False) ] traces += bars else: #implement boxplot boxwidth = 50 quartiles = np.array( [np.percentile(data[n], [25, 75]) for n in range(N)]) minmax = np.array([np.percentile(data[n], [5, 95]) for n in range(N)]) boxs = [ boxPlot(meds[n], quartiles[n], minmax[n], mean=means[n], outliers=None, name=names[n], horiz=0, offset=n, legendGroup='boxplot', showleg=False, plot=False, col=cols[n], width=boxwidth) for n in range(N) ] traces += sum(boxs, []) # scale markersize Lxp = np.max(Lx) if Lxp > 5000: markersize = 1 elif Lxp > 2000: markersize = 2 elif Lxp > 1000: markersize = 3 elif Lxp > 200: markersize = 4 elif Lxp > 80: markersize = 5 else: markersize = 7 # reduce length of data for plotting data_to_plot = [ np.random.choice(col, maxData, replace=False) if len(col) > maxData else col for col in data ] dataPlot = [ go.Scatter(x=i + .5 + np.random.normal(size=len(data_to_plot[i])) * jitter, y=data_to_plot[i], mode='markers', marker=dict(size=markersize, color=cols[i]), name=names[i]) for i in range(N) ] traces += dataPlot xaxis = go.layout.XAxis( # title="", showgrid=True, showline=True, ticks="", showticklabels=True, linewidth=2, ticktext=names, tickvals=list(range(N)), tickfont=dict(size=18)) # if data has huge outliers, manually bring axes closer to look better auto_rng = np.max([np.max(col) for col in data_to_plot]) < 2 * np.max(means + std) # stats statvals = [] if 'MW' in stats and N == 2: try: stat, pval = sp.stats.mannwhitneyu(data[0], data[1], alternative='two-sided') statvals += [['MW', pval]] except: print('Could not process MW stats') if 'ttest' in stats and N == 2: stat, pval = sp.stats.ttest_ind(data[0], data[1]) statvals += [['T-test', pval]] if 'ANOVA' in stats: print('ANOVA not yet implemented') if 'KW' in stats: print('Kruskal–Wallis test not yet implemented') if len(statvals) > 0: stat_str = '. '.join(['P(%s)=%.3f' % (x[0], x[1]) for x in statvals]) title = title + '. ' + stat_str layout = go.Layout( title=title, xaxis=xaxis, yaxis={ 'title': ylbl, 'range': [0, np.max(means + std) * 2], 'autorange': auto_rng }, bargap=.5, hovermode='closest', showlegend=False, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def scatterHistoPlot(x, y, title='2D Density Plot', xlbl='', ylbl='', plot=True): """ This creates a scatter plot above a contour plots for the data """ scatter_plot = go.Scatter(x=x, y=y, mode='markers', name='points', marker=dict(color='rgb(102,0,0)', size=2, opacity=0.4)) contour_plot = go.Histogram2dcontour(x=x, y=y, name='density', ncontours=20, colorscale='Hot', reversescale=True, showscale=False) x_density = go.Histogram(x=x, name='x density', marker=dict(color='rgb(102,0,0)'), yaxis='y2') y_density = go.Histogram(y=y, name='y density', marker=dict(color='rgb(102,0,0)'), xaxis='x2') data = [scatter_plot, contour_plot, x_density, y_density] scatterplot_ratio = .85 # ratio of figure to be taken by scatterplot vs histograms layout = go.Layout(title=title, showlegend=False, autosize=False, width=600, height=550, xaxis=dict(title=xlbl, domain=[0, scatterplot_ratio], showgrid=False, zeroline=False), yaxis=dict(title=ylbl, domain=[0, scatterplot_ratio], showgrid=False, zeroline=False), margin=dict(t=50), hovermode='closest', bargap=0, xaxis2=dict(domain=[scatterplot_ratio, 1], showgrid=False, zeroline=False), yaxis2=dict(domain=[scatterplot_ratio, 1], showgrid=False, zeroline=False)) fig = go.Figure(data=data, layout=layout) return plotOut(fig, plot)
def multiMean(data, x=None, std=True, names=None, plot=True, title='', ylbl='', xlbl='', norm=None, indiv=False, indivnames=None): """ Plots means of multiple data matrices :param data: list of data matrices :param x: optional x-data :param std: 1/0. If 1 plots shaded std deviation around mean :param names: names of data :param plot: if false, just returns plotly json object :param title: title of plot :param ylbl: :param xlbl: :param norm: nput to norm_mat function if want to norm the data :param indiv: 1/0 whether we want to plot each of the individual lines :param indivnames: names of individual line traces :return: """ data = [np.atleast_2d(np.array(d)) for d in data] N = len(data) Ncol, Lx = zip(*[d.shape for d in data]) if len(np.unique(Lx)) != 1: raise ValueError('Input data sources must be of the same length (Lx)') Lx = Lx[0] if norm is not None: data = [norm_mat(d, method=norm) for d in data] if names is None: names = ['#%d' % (i) for i in range(N)] if x is None: x = np.array(range(Lx)) x = np.atleast_2d(x) traces = [] cols = cl.scales[str(max(3, N))]['qual']['Set1'] tcols = ['rgba' + c[3:-1] + ',.2)' for c in cols] for n in range(N): mean = np.mean(data[n], axis=0) std = np.std(data[n], axis=0) plotmean = go.Scatter(x=x[0], y=mean, name=names[n], legendgroup=names[n], line={ 'width': 4, 'color': cols[n] }) ploterror_top = go.Scatter( x=x[0], y=mean + std, fill='none', fillcolor=tcols[n], mode='lines', marker=dict(color=tcols[n]), line=dict(width=0), showlegend=False, legendgroup=names[n], name=names[n] + ' UB', opacity=.7, ) ploterror_bottom = go.Scatter( x=x[0], y=mean - std, fill='tonexty', fillcolor=tcols[n], mode='lines', marker=dict(color=tcols[n]), line=dict(width=0), showlegend=False, legendgroup=names[n], name=names[n] + ' LB', opacity=.7, ) traces += [plotmean, ploterror_top, ploterror_bottom] if indiv and Ncol[n] > 1: inames = [''] * Ncol[n] if indivnames is None else indivnames indivlines = [ go.Scatter(x=x[0], y=l, showlegend=c == 0, name=names[n] + ' |', legendgroup=names[n] + ' |', hovertext=inames[c], hoverinfo='text', opacity=.3, line={ 'width': 1, 'color': cols[n], 'dash': 'dot' }) for c, l in enumerate(data[n]) ] traces += indivlines layout = go.Layout( title=title, xaxis={'title': xlbl}, yaxis={'title': ylbl}, hovermode='closest', ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def plotConfusionMatrix(y_true, # list of true labels y_pred, # list of predicted labels conf_matrix = None, # optional mode to directly provide confusion matrix title = 'Confusion Matrix', labels = None, # list of labels for each class binarized = None, # if int/str then makes 1vsAll confusion matrix of that class add_totals = True, # whether to add an extra row for class totals plot = True, # 1/0. If 0, returns plotly json object, but doesnt plot fontsize=18, # axis font norm='rows', # how to norm matrix colors. either 'all'/'rows'/'columns' ): """ Plots either a full or binarized confusion matrix EX: plotConfusionMatrix(y_true, y_pred, labels) """ n_classes = len(labels) if labels is not None else len(np.unique(y_true)) if labels is None: labels = ['C%d' % n for n in range(1, n_classes+1)] if conf_matrix == None: conf_matrix = sk.metrics.confusion_matrix(y_true, y_pred, labels=range(n_classes)) acc = np.diag(conf_matrix).sum() / np.sum(conf_matrix) * 100 if binarized is not None: # identify index of 1vsAll category if type(binarized) == str: bin_indx = labels.index(binarized) else: bin_indx = binarized tp = np.sum(np.delete(np.delete(conf_matrix, bin_indx, axis=0), bin_indx, axis=1)) fp = np.sum(np.delete(conf_matrix[bin_indx, :], bin_indx)) fn = np.sum(np.delete(conf_matrix, bin_indx, axis=0)[:, bin_indx]) tn = conf_matrix[bin_indx, bin_indx] conf_matrix = np.array([[tp, fn], [fp, tn]]) labels = ['T','F'] n_classes = 2 labels = [str(x) for x in labels] # convert to str labels = ['['+x+']' if len(x)==1 else x for x in labels] #needed for stupid plotly bug # adds an extra row for matrix totals conf_matrix_tots = copy.deepcopy(conf_matrix) if add_totals: pred_tots = np.sum(conf_matrix, 0) conf_matrix_tots = np.vstack((conf_matrix, pred_tots)) true_tots = np.sum(conf_matrix_tots, 1, keepdims=True) conf_matrix_tots = np.hstack((conf_matrix_tots, true_tots )) labels = labels + ['TOTAL'] # shorten labels labels_short = [x[:10] if type(x) == str else x for x in labels] # numeric labels num_labels = list(range(len(labels))) def normMatByTotal(mat, axis=0): ''' This normalzies a matrix by its row (axis=1) or column (axis=0) totals''' axis_sums = np.sum(mat, axis=axis, keepdims=True).astype('float32') axis_sums[axis_sums == 0] = np.nan # this avoids divide by 0. mat = np.nan_to_num(mat / axis_sums) return mat # percentage hover labels row_percs = normMatByTotal(conf_matrix, axis=1) col_percs = normMatByTotal(conf_matrix, axis=0) # normalize matrix color_mat = copy.deepcopy(conf_matrix_tots) if norm != 'all': norm_conf_matrix = row_percs if norm=='rows' else col_percs else: norm_conf_matrix = conf_matrix color_mat = color_mat.astype(float) color_mat[:-1,:-1] = norm_conf_matrix # hover text txt_format = '<b>Pred:</b> %s <br><b>True:</b> %s <br><b>Row norm:</b> %.3f%% <br><b>Col norm:</b> %.3f%%' htext = np.array([[txt_format % (labels[c], labels[r], row_percs[r,c]*100, col_percs[r,c]*100) for c in range(n_classes)] for r in range(n_classes)]) # Adjust Total rows if add_totals: totals_row_shading = .0 # range 0 to 1. 0=darkest, 1=lightest tot_val = np.min(norm_conf_matrix) + (np.max(norm_conf_matrix) - np.min(norm_conf_matrix))*totals_row_shading color_mat[-1, :] = tot_val color_mat[:, -1] = tot_val pred_tot_text = np.array(['<b>%% of Predictions:</b> %.2f%%' % x for x in pred_tots/sum(pred_tots)*100]) true_tot_text = np.array([['<b>%% of True Data:</b> %.2f%%' % x] for x in true_tots[:-1]/sum(true_tots[:-1])*100]+[['Total Samples']]) htext = np.hstack((np.vstack((htext, pred_tot_text)), true_tot_text)) fig = ff.create_annotated_heatmap(color_mat, x=num_labels, y=num_labels, colorscale='Greys', annotation_text=conf_matrix_tots) fig.layout.yaxis.title = 'True' fig.layout.xaxis.title = 'Predicted (Total accuracy = %.3f%%)' % acc fig.layout.xaxis.titlefont.size = fontsize fig.layout.yaxis.titlefont.size = fontsize fig.layout.xaxis.tickfont.size = fontsize - 2 fig.layout.yaxis.tickfont.size = fontsize - 2 fig.layout.showlegend = False # Add label text to axis values fig.layout.xaxis.tickmode = 'array' fig.layout.xaxis.range = [-.5, n_classes+.5] fig.layout.xaxis.tickvals = num_labels fig.layout.xaxis.ticktext = labels_short fig.data[0].hoverlabel.bgcolor = 'rgb(188,202,225)' # fig.layout.yaxis.autorange = 'reversed' fig.layout.yaxis.tickmode = 'array' fig.layout.yaxis.range = [n_classes+.5, -.5] fig.layout.yaxis.tickvals = num_labels fig.layout.yaxis.ticktext = labels_short fig.layout.margin.l = 120 # adjust left margin to avoid ylbl overlaying tick str's fig['data'][0]['xgap'] = 1 fig['data'][0]['ygap'] = 1 ## Change annotation font (& text) for i in range(len(fig.layout.annotations)): fig.layout.annotations[i].font.size = fontsize-3 #fig.layout.annotations[i].text = str(conf_matrix_tots.flatten()[i]) # add hover text fig.data[0].text = htext fig.data[0].hoverinfo = 'text' ### Adjust totals fontstyle if add_totals: # get totals indxs n = n_classes last_column_indxs = [(n + 1) * x - 1 for x in range(1, n + 1)] last_row_indxs = list(range((n + 1) * (n), (n + 1) ** 2)) totals_annot_indxs = last_row_indxs + last_column_indxs # adjust totals font size & color for i in totals_annot_indxs: fig['layout']['annotations'][i]['font'] = dict(size=fontsize, color='#000099') # Add border lines for total row/col data = list(fig['data']) data += [go.Scatter(x=[n_classes - .5, n_classes - .5], y=[-.5, n_classes + .5], showlegend=False, hoverinfo='none', line=dict(color='red', width=4, dash='solid'))] data += [go.Scatter(y=[n_classes - .5, n_classes - .5], x=[-.5, n_classes + .5], showlegend=False, hoverinfo='none', line=dict(color='red', width=4, dash='solid'))] fig = go.Figure(data=data, layout=fig['layout']) return plotOut(fig, plot)
def plotHist( data, # 1D list/np vector of data maxData=1000, # max # of points to plot above histogram (if too high, it will be slow) plot=True, #1/0. If 0, returns plotly json object, but doesnt plot title='Distribution', # plot title xlbl='', # plot label rm_outliers=False, #1/0 whether to remove outliers or not density=True, # whether to plot PDF or count boxplot=True, # 1/0 whether to do upper boxplot scatter=True, # 1/0 add upper scatterplot diff_tst=0 ): # 1/0. If 1 assumes we checking for a signif difference from 0 """ Plots a 1D histogram using plotly. Does the binning w/ numpy to make it go way faster than plotly's inherent histogram function Usage: x = np.random.normal(0,1,(100)) plotHist(x, title='Normal Distribution', xlbl='values', diff_tst=1) :return: NA """ N = len(data) data = np.array(data) # remove NaNs/Infs try: data = data[~np.isnan(data)] data = data[np.isfinite(data)] except: print('Failed to do NaN removal') adj, corr_data, outliers, rng, stats = removeOutliers(data, stdbnd=6, percclip=[5, 95], rmv=rm_outliers) hy, hx = np.histogram(data, bins=40, density=density, range=rng) top = np.max(hy) * 1.1 jitter = .02 traces = [] hist = go.Bar(x=hx, y=hy, name='Hist', opacity=.5, marker=dict(color='red', line=dict(color='black', width=2))) traces += [hist] # if data too large only plot a subset if scatter: if N > maxData: Np = maxData dataToPlot = np.random.choice(data, Np, replace=False) else: dataToPlot, Np = data, N dataPlot = go.Scatter(x=dataToPlot, y=top + np.random.normal(size=Np) * top * jitter, name='data', mode='markers', marker=dict(color='black', size=2), hoverinfo='x+name') traces += [dataPlot] #boxplot if boxplot: bp = boxPlot(stats['med'], np.percentile(data, [25, 75]), rng, mean=stats['mean'], horiz=True, offset=top * 1.2, plot=False, col='red', showleg=True) traces += bp if diff_tst: vertline = go.Scatter(x=[0, 0], y=[0, top * 1.1], name='x=0', showlegend=1, line=dict(color='black', width=2, dash='dot')) traces += [vertline] _, Pt = sp.stats.ttest_1samp(data, 0) _, Pw = sp.stats.wilcoxon(data) title += ' P_t=%.2f. P_w=%.2f' % (Pt, Pw) ylbl = 'Probability Density' if density else 'Count' fig = go.Figure(data=traces, layout={ 'title': title, 'yaxis': { 'title': ylbl }, 'xaxis': { 'title': xlbl, 'range': [rng[0] * .9, rng[1] * 1.1] }, 'bargap': 0, 'hovermode': 'closest', }) return plotOut(fig, plot)
def corrPlot( x, # 1D data vector or list of 1D dsata vectors y, # 1D data vector or list of 1D dsata vectors z=None, # optional colors for the lines names=None, # names of x, y (ex:['A', 'B'] maxdata=2000, # max # of points to plot above histogram (if too high, it will be slow) addCorr=True, # whether to add correlation statistics into plot (R2, spearmanR2, Pvals, & y=mx+b) addCorrLine=True, # whether to plot correlation line addXYline=False, # whether to plot y=x line text=None, # whether to add additional text to each point plot=True, # if false, just returns plotly json object title='Correlation', # title of plot xlbl='', # ylbl='', markersize=None, # either None or #. If None, will automatically determine best ): """ Plots x , y data and their trendline using plotly EX: plot diff between two series corrPlot(x, y, xlbl='A', ylbl='B', addCorr=False, addCorrLine=False, addXYline=True) """ #TODO: remove outliers # 1st convert t ndarray # 1st convert t ndarray if type(x) != np.ndarray: x = np.array(x) if type(y) != np.ndarray: y = np.array(y) # (1) get N if np.issubdtype(x.dtype, np.number): # given an np array x = np.atleast_2d(x) y = np.atleast_2d(y) N, Lx = x.shape else: # given a data array N = len(x) # (2) remove NaNs tmpx, tmpy = [], [] for n in range(N): bad = np.atleast_2d(np.isnan(x[n]) | np.isnan(y[n])) tmpx += [x[n][~bad[0]]] tmpy += [y[n][~bad[0]]] x = np.array(tmpx) y = np.array(tmpy) # (3) get Lx if np.issubdtype(x.dtype, np.number): # given an np array N, Lx = x.shape Lx = np.tile(Lx, N) else: # given a data array Lx = [len(l) for l in x] Ly = [len(l) for l in y] if Lx != Ly: raise ValueError('All x & y vectors must be same length!!!') # if data has too many points, remove some for speed Iplot = [ np.arange(Lx[n]) if Lx[n] < maxdata else np.random.choice( Lx[n], size=maxdata, replace=False) for n in range(N) ] if names is None: names = ['Line ' + str(i) for i in range(N)] if isinstance(names, str): names = [names] traces = [] # determine scatterpoint colors if z is not None: assert N == 1, 'So far coloring only works w/ 1 data series' if type(z) != np.ndarray: z = np.array(z) z = np.atleast_2d(z) cols = z showleg = False showscale = True line_col = ['black'] lg = [None] scattertext = ['z=%d' % (i) for i in range(Lx[0])] if text is None else text else: if N > 1: lg = names showleg = False cols = cl.scales[str(max(3, N))]['qual']['Set1'] else: lg = [None] showleg = True cols = ['blue'] line_col = cols showscale = False if text is None: scattertext = '' else: scattertext = text # scale markersize Lxp = np.min([max(Lx), maxdata]) if markersize is None: if Lxp > 5000: markersize = 1 elif Lxp > 2000: markersize = 2 elif Lxp > 1000: markersize = 3 elif Lxp > 200: markersize = 4 elif Lxp > 80: markersize = 5 elif Lxp > 25: markersize = 7 else: markersize = 9 scatPlot = [ go.Scatter(x=x[n][Iplot[n]], y=y[n][Iplot[n]], name=names[n], legendgroup=lg[n], mode='markers', opacity=.5, text=scattertext, marker={ 'size': markersize, 'color': cols[n], 'showscale': showscale, 'colorscale': 'Portland' }) for n in range(N) ] traces += scatPlot annots = [] if addCorr: for n in range(N): slope, intercept, R2, p_val, std_err = sp.stats.linregress( x[n], y[n]) R2sp, p_val_sp = sp.stats.spearmanr(x[n], y[n]) corrtext = 'Pearson [R2, P]=[%.2f,%.2f] <br> ' \ 'Spearman [R2, P]=[%.2f,%.2f] <br> ' \ 'y=%.2fx+%.2f' \ % (R2, p_val, R2sp, p_val_sp, slope, intercept) #if only 1 data record print stats on graph if N == 1: annots = go.Annotations([ go.Annotation(x=0.05, y=0.95, showarrow=False, text=corrtext, xref='paper', yref='paper') ]) if addCorrLine: x_rng = [np.min(x[0]), np.max(x[0])] dx_rng = x_rng[1] - x_rng[0] shift = .03 # shift from edges xc = np.array( [x_rng[0] + dx_rng * shift, x_rng[1] - dx_rng * shift]) yc = slope * xc + intercept corrline = [ go.Scatter(x=xc, y=yc, name=names[n] + ' corr', legendgroup=lg[n], showlegend=showleg, mode='lines', line={'color': line_col[n]}, hovertext=corrtext, hoverinfo='name+text') ] traces += corrline if addXYline: x_rng = [np.min(x[0]), np.max(x[0])] dx_rng = x_rng[1] - x_rng[0] shift = .03 # shift from edges xc = np.array([x_rng[0] + dx_rng * shift, x_rng[1] - dx_rng * shift]) xyline = [ go.Scatter(x=xc, y=xc, name='X=Y', showlegend=True, mode='lines', line={'color': 'black'}) ] traces += xyline showleg = False if N == 1 else True layout = go.Layout( title=title, annotations=annots, xaxis={'title': xlbl}, yaxis={'title': ylbl}, hovermode='closest', showlegend=showleg, ) fig = go.Figure(data=traces, layout=layout) return plotOut(fig, plot)
def getSTA( trigger, signal, rng, lags=1, norm='zscore', #how each STA trial is normalized in all_sta removeOutliers=True, #1/0. If 1 remove data +- 6 std devs from mean # plotting parameters plot=False, xtra_times=None, #plots dots relative to on times. Fs=1, title='Stimulus Triggered Average'): ''' Computes stimulus triggered average of signal from trigger :param trigger: trigger points around which to calc STA. 1D vector of bins (thus ints) :param signal: 1D vector :param rng: [lb, ub] in bins of how long to get the STA for :param lags: int of how much bins to skip (thus if 2, takes every other point in the STA) :param plot: 1/0. If 1, then plots in plotly :param Fs: sampling frequency of data. Only relevant for plotting :param title: title of plot :return: ''' # calculate STA sta, bins, all_sta = calcSTA(trigger, signal, rng, lags=lags, norm=norm, removeOutliers=removeOutliers) # generate STA plot N = len(trigger) sta_rescaled = len(trigger) / (np.max(sta) - np.min(sta)) * (sta - np.min(sta)) + .5 # good colormaps are Picnic, Rainbow heatmap = go.Heatmap(x=bins / Fs, y=np.arange(1, len(all_sta) + 1), z=all_sta, colorscale='Rainbow') line = go.Scatter(x=bins / Fs, y=sta_rescaled, line={ 'color': 'black', 'width': 3 }, name='STA') yaxis = go.Scatter(x=[0, 0], y=[.5, N + .5], showlegend=False, line={ 'color': 'black', 'dash': 'dash', 'width': 1 }) if xtra_times is not None: dots = [ go.Scatter( x=(xtra_times - trigger) / Fs, y=np.arange(N) + 1, name='dots', mode='markers', marker=dict(size=4, color='white'), ) ] else: dots = [] layout = { 'title': title, 'xaxis': { 'title': 'Times (s)', 'range': [-rng[0] / Fs, rng[1] / Fs] }, 'yaxis': { 'title': 'Trial', 'range': [.5, N + .5] }, } fig = go.Figure(data=[heatmap, line, yaxis] + dots, layout=layout) plotOut(fig, plot) return sta, bins, all_sta, fig