def histogram_month(filename, metrics, key, title_str, ylabel): bkh.reset_output() bkh.output_file(filename, title=filename) data_months = { 'index': metrics['A'][key].index, metrics['A']['name']: metrics['A']['frame_months'].frequency, metrics['B']['name']: metrics['B'][key].frequency } fig = bkh.figure(x_axis_type='datetime', title=title_str, width=720, height=480) fig.vbar(x='index', top='frequency', width=timedelta(days=10), source=metrics['A'][key], color=colors[0], legend=metrics['A']['name']) fig.vbar(x='index', top='frequency', width=timedelta(days=10), source=metrics['B'][key], color=colors[1], legend=metrics['B']['name']) fig.xaxis.axis_label = 'Date' fig.yaxis.axis_label = ylabel bkh.show(fig) return
def plot_comp_ave( x_axi, wholedata, trainPredict, trainPredict_2, testPredict, startpoint, test_num, ave_len, title='plot', path=None, filename='plot' ): the_dir = 'plot' if path == None else str(path) os.mkdir( the_dir ) if not os.path.isdir(the_dir) else None output_file( the_dir + '/' + str( filename ) + '.html' ) p = figure(title=title, x_axis_label='Time', y_axis_label='Load', x_axis_type="datetime", plot_width=1200) p.line( x_axi[ : -ave_len+1 ], wholedata[:,0] ) test_pred_start = test_num[0] + startpoint train_2_start = test_num[1] + startpoint if trainPredict.shape[ -1 ] == 1: p.line( x_axi[ startpoint : startpoint+len( trainPredict ) ], trainPredict[:,0], color="#B3DE69" ) p.line( x_axi[ test_pred_start : test_pred_start+len( testPredict ) ], testPredict[:,0], color="#CAB2D6" ) p.line( x_axi[ train_2_start : train_2_start+len( trainPredict_2 ) ], trainPredict_2[:,0], color="#B3DE69" ) else: p.line( x_axi[ startpoint : startpoint+len( trainPredict ) ], trainPredict, color="#B3DE69" ) p.line( x_axi[ test_pred_start : test_pred_start+len( testPredict ) ], testPredict, color="#CAB2D6" ) p.line( x_axi[ train_2_start : train_2_start+len( trainPredict_2 ) ], trainPredict_2, color="#B3DE69" ) show(p) reset_output()
def histogram_month(filename, metrics, key, title_str, ylabel): bkh.reset_output() bkh.output_file(filename, title=filename) data_months = { "index": metrics["A"][key].index, metrics["A"]["name"]: metrics["A"]["frame_months"].frequency, metrics["B"]["name"]: metrics["B"][key].frequency, } fig = bkh.figure(x_axis_type="datetime", title=title_str, width=720, height=480) fig.vbar( x="index", top="frequency", width=timedelta(days=10), source=metrics["A"][key], color=colors[0], legend=metrics["A"]["name"], ) fig.vbar( x="index", top="frequency", width=timedelta(days=10), source=metrics["B"][key], color=colors[1], legend=metrics["B"]["name"], ) fig.xaxis.axis_label = "Date" fig.yaxis.axis_label = ylabel bkh.show(fig) return
def variable_correction_plots(station, dt_array, var_one, corr_var_one, var_two, corr_var_two, code, folder_path): x_size = 800 y_size = 350 reset_output() # clears bokeh output, prevents ballooning file sizes delta_var_one = corr_var_one - var_one delta_var_two = corr_var_two - var_two with np.errstate(divide='ignore', invalid='ignore'): # Silencing all errors when we divide by a nan prct_var_one = ((corr_var_one - var_one) / var_one) * 100.0 prct_var_two = ((corr_var_two - var_two) / var_two) * 100.0 # Obtain title based on variables passed for file name (units, title, var_one_name, var_one_color, var_two_name, var_two_color) = generate_line_plot_features(code, '') output_file(folder_path + "/correction_files/" + station + "_" + title + "_correction_plots.html") original_plot = line_plot(x_size, y_size, dt_array, var_one, var_two, code, station + ' Original ', link_plot=None) corrected_plot = line_plot(x_size, y_size, dt_array, corr_var_one, corr_var_two, code, 'Corrected ', link_plot=original_plot) delta_plot = line_plot(x_size, y_size, dt_array, delta_var_one, delta_var_two, code, 'Deltas of ', link_plot=original_plot) percent_plot = line_plot(x_size, y_size, dt_array, prct_var_one, prct_var_two, code, '% Difference of ', link_plot=original_plot) corr_fig = gridplot([[original_plot], [corrected_plot], [delta_plot], [percent_plot]], toolbar_location="left") return corr_fig
def __init__(self, df, params=[], logify=False, output='notebook', notebook_url="http://localhost:8888", **kwargs): self.df = df self.params = params self.logify = logify self.kwargs = kwargs if output == 'notebook': reset_output() output_notebook() show(self.modify_doc, notebook_url=notebook_url) else: reset_output() server = Server({'/': self.modify_doc}) server.start() try: server = Server({'/': self.modify_doc}) server.run_until_shutdown() except: pass #print("Server running") server.show("/") self.server = server
def plotPreds(prediction, test, outputDir, parametersSet): reset_output() stocks = test.columns.values dataTest = test.reset_index() output_file(outputDir + '_'.join(parametersSet) + '_predPerf.html') colors_list = ['green', 'red'] grid = [] subGrid = [] for i, stock in enumerate(sorted(stocks)): if i % 3 == 0 and i != 0: grid.append(subGrid) subGrid = [] legends_list = [stock, 'reconstruction'] xs = [dataTest['Date'], dataTest['Date']] ys = [dataTest[stock], prediction[stock]] p = figure(x_axis_type="datetime", y_axis_label = "Log-return") for (colr, leg, x, y ) in zip(colors_list, legends_list, xs, ys): p.line(x, y, color=colr, legend=leg) subGrid.append(p) p = gridplot(grid) save(p) return True
def plot_flare_bokeh(time, flux_pca, flux = None, flux_type ="",width_fig = 900, height_fig = 500, title=""): # output form reset_output() output_notebook() TOOLTIPS = [ ("index", "$index"), ("(x,y)", "($x, $y)"), ] # graph setting p = figure(tooltips=TOOLTIPS, title=title, x_axis_label="Time", y_axis_label="Flux", plot_width = width_fig, plot_height=height_fig) # main body for plotting #if flux is not None: #p.circle(time, flux, legend="raw" + "(%s)" % flux_type, color="red") #p.line(time, flux, legend="raw"+ "(%s)" % flux_type, color="red") p.circle(time, flux_pca, color="black") p.line(time, flux_pca, color="black") #p.add_layout(p.legend[0], "right") # 凡例をグラフの外に出す(右側) # Hide legend if you click #p.legend.click_policy = "hide" # Output format (svg or png) p.output_backend = "svg" # Show show(p)
def save_to_html_for_account_code(new_df, filename, account_code): reset_output() output_file(filename) sample = new_df.loc[new_df['account_code'] == account_code] source = ColumnDataSource(sample) clients = source.data['client_name'].tolist() p = figure(x_range=clients) p.vbar(x='client_name', top='test_prediction', source=source, width=0.50, color='red') p.xaxis.major_label_orientation = "vertical" p.title.text = 'Bank Marketing Predictions' p.yaxis.axis_label = 'Prediction rate' hover = HoverTool() hover.tooltips = [('Client Name', '@client_name'), ('Account Code', '@account_code'), ('Age', '@age'), ('Campaign', '@campaign'), ('Pdays', '@pdays'), ('Previous', '@previous'), ('Marital status', '@marital_married'), ('Target', '@test_prediction')] p.add_tools(hover) save(p)
def plotResiduals(residuals, outputDir, parametersSet, who): reset_output() stocks = residuals.columns.values res = residuals.reset_index() output_file(outputDir + '_'.join(parametersSet) + '_residuals_' + who + '.html') grid = [] subGrid = [] for i, stock in enumerate(sorted(stocks)): if i % 3 == 0 and i != 0: grid.append(subGrid) subGrid = [] p1 = figure(title=stock + ' ' + who + ' residuals', background_fill_color="#E8DDCB", x_axis_label='r - r_hat') p1.yaxis.visible = None p1.legend.location = "top_left" hist, edges = np.histogram(res[stock], density=True, bins=25) p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="#036564", line_color="#033649") subGrid.append(p1) p = gridplot(grid) save(p) return True
def histogram_weekdays(filename, metrics): bkh.reset_output() bkh.output_file(filename, title=filename) weekdays = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] fig = bkh.figure(x_range=weekdays, title='Message distribution over weekdays', width=720, height=480) fig.vbar(x=dodge('index', 0.35, range=fig.x_range), top='frequency', width=0.3, source=metrics['A']['frame_weekdays'], color=colors[0], legend=metrics['A']['name']) fig.vbar(x=dodge('index', 0.65, range=fig.x_range), top='frequency', width=0.3, source=metrics['B']['frame_weekdays'], color=colors[1], legend=metrics['B']['name']) fig.xaxis.axis_label = 'Weekday' fig.yaxis.axis_label = 'Message count' bkh.show(fig) return
def histogram_hourofday(filename, metrics, key, title_str, ylabel): bkh.reset_output() bkh.output_file(filename, title=filename) hours = [ '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00' ] fig = bkh.figure(x_range=hours, title=title_str, width=1280, height=480) fig.vbar(x=dodge('index', 0.35, range=fig.x_range), top='frequency', width=0.3, source=metrics['A'][key], color=colors[0], legend=metrics['A']['name']) fig.vbar(x=dodge('index', 0.65, range=fig.x_range), top='frequency', width=0.3, source=metrics['B'][key], color=colors[1], legend=metrics['B']['name']) fig.xaxis.axis_label = 'Time' fig.yaxis.axis_label = ylabel bkh.show(fig) return
def __init__(self, df, params=[], trim_factor=1, logify=False, output='notebook', port=5006, notebook_url="http://localhost:8888", **kwargs): self.df = df.iloc[::trim_factor].reset_index(drop=True) self.params = params self.logify = logify self.kwargs = kwargs if output == 'notebook': reset_output() output_notebook() show(self.modify_doc, notebook_url=notebook_url) elif output == 'server': reset_output() server = Server({'/': self.modify_doc}, port=port) server.start() try: server.run_until_shutdown() except: print("Server already running") self.server = server
def bokeh_simple_barchart(absciss, ordinate, title, graph_dir, graph_name, dump_jpg, show_html, width=1800, height=800): """ Dump simple bokeh barchart with single categorical value """ """ create an output graphics file """ output_file(os.path.join(graph_dir, graph_name + ".html")) p = figure(x_range=absciss, plot_height=height, plot_width=width, title=title, toolbar_location=None, tools="") p.vbar(x=absciss, top=ordinate, width=0.9) p.xgrid.grid_line_color = None p.y_range.start = 0 """ show figure in browser """ if show_html == True: show(p) """ dump figure as png file """ if dump_jpg == True: export_png(p, filename=os.path.join(graph_dir, graph_name + ".png")) reset_output()
def showScatterPlot(self): """ scatter plot visualization """ reset_output() output_file('showme2.html') show(self.createScatterPlot())
def test_heatmap_recipe(self): ar_downsample._loadAR() reset_output() sess = Session(client=app.test_client()) output_server('Census', session=sess) source = ServerDataSource(expr={ 'op': 'Field', 'args': [':leaf', 'bivariate'] }) plot = figure(plot_width=600, plot_height=400, title="Test Title") plot.square('A', 'B', source=source) plot2 = ar_downsample.heatmap(plot, palette="Reds9", reserve_val=0, points=True, client_color=True, title="Test Title 2") source2 = self._find_source(plot2) self.assertEquals("Test Title 2", plot2.title) self.assertEquals(type(source2), ServerDataSource) transform = source2.transform self.assertEquals(type(transform['info']), ar_downsample.Const) self.assertEquals(type(transform['agg']), ar_downsample.Count) self.assertEquals(type(transform['shader']), ar_downsample.Seq) self.assertEquals(transform['shader'].out, "image")
def histogram_month_chars(filename, metrics): bkh.reset_output() bkh.output_file(filename, title=filename) data_months = { 'index': metrics['A']['frame_months_chars'].index, metrics['A']['name']: metrics['A']['frame_months_chars'].frequency, metrics['B']['name']: metrics['B']['frame_months_chars'].frequency } fig = bkh.figure(x_axis_type='datetime', title='Monthly character count over time per person', width=720, height=480) fig.vbar(x='index', top='frequency', width=timedelta(days=10), source=metrics['A']['frame_months_chars'], color=colors[0], legend=metrics['A']['name']) fig.vbar(x='index', top='frequency', width=timedelta(days=10), source=metrics['B']['frame_months_chars'], color=colors[1], legend=metrics['B']['name']) fig.xaxis.axis_label = 'Date' fig.yaxis.axis_label = 'Number of characters' bkh.show(fig) return
def plot_file(file, path, plot_dir): dt = pd.read_csv(os.path.join(path, file)) axis_datetime = np.array(dt['Time'], dtype=np.datetime64) col_list = list(i for i in dt.columns.values if i != 'Time') if not os.path.isdir(plot_dir): os.mkdir(plot_dir) full_path = os.path.join(plot_dir, file.split('.')[0] + '.html') output_file(full_path) plot_list = [] for c in col_list: p = figure(title=c, x_axis_label='Time', y_axis_label=c, x_axis_type="datetime", plot_width=1200) p.line(axis_datetime, dt[c]) plot_list.append(p) p = column(plot_list) show(p) reset_output()
def histogram_month_chars(filename, metrics): bkh.reset_output() bkh.output_file(filename, title=filename) data_months = { "index": metrics["A"]["frame_months_chars"].index, metrics["A"]["name"]: metrics["A"]["frame_months_chars"].frequency, metrics["B"]["name"]: metrics["B"]["frame_months_chars"].frequency, } fig = bkh.figure( x_axis_type="datetime", title="Monthly character count over time per person", width=720, height=480, ) fig.vbar( x="index", top="frequency", width=timedelta(days=10), source=metrics["A"]["frame_months_chars"], color=colors[0], legend=metrics["A"]["name"], ) fig.vbar( x="index", top="frequency", width=timedelta(days=10), source=metrics["B"]["frame_months_chars"], color=colors[1], legend=metrics["B"]["name"], ) fig.xaxis.axis_label = "Date" fig.yaxis.axis_label = "Number of characters" bkh.show(fig) return
def plot_heart_rate_variability(apple_watch): """ Generate swarm-like plots of heart rate variability measures for multiple days :param apple_watch: data frame of heart rate variability data :return: None """ logger.info('Loading and Plotting Heart Rate Variability Data') df = apple_watch.load_heart_rate_variability_data() df = df[(df['start_timestamp'] > START_DATE) & (df['start_timestamp'] < END_DATE)] df['date'] = list( map(lambda d: d.strftime('%m/%d/%y'), df['start_timestamp'])) df['time'] = list( map(lambda d: d.strftime('%H:%M:%S'), df['start_timestamp'])) dates = list(df['date'].unique()) # remove instantaneous data, bokeh doesn't not like dictionary format del df['instantaneous_bpm'] source = ColumnDataSource(df) plot = figure(width=800, height=600, x_range=dates, x_axis_label='Date', y_axis_label='Time Between Heart Beats (ms)', title='Apple Watch Heart Rate Variability (SDNN)', tools='pan, wheel_zoom, box_zoom, reset, hover', toolbar_location='above', sizing_mode='scale_both') # add color map for dates dates_cmap = factor_cmap('date', palette=Category20_20, factors=dates) plot.circle(x='date', y='heart_rate_variability', source=source, size=12, fill_color=dates_cmap) plot.xaxis.axis_label_text_font_size = "14pt" plot.xaxis.major_label_text_font_size = "12pt" plot.yaxis.axis_label_text_font_size = "14pt" plot.yaxis.major_label_text_font_size = "12pt" plot.title.text_font_size = '16pt' # configure hover tool plot.select_one(HoverTool).tooltips = [ ('date', '@date'), ('time', '@time'), ('time interval', '@heart_rate_variability') ] if SHOW_PLOTS: show(plot, browser='chrome') save_plot(plot, 'heart_rate_variability') # clear output mode for next plot reset_output() # save dataframe df.to_csv('apple_watch_data/heart_rate_variability.csv', index=False)
def readTempData(): attacks = ['190C', '200C', '210C', '220C', '230C'] # Create a dictionary to store each figure in p = {} legendColors = ['navy', 'olive', 'firebrick', 'orange', 'purple'] titles = [ 'Lowered to 190C', 'Lowered to 200C', 'Control - 210C', 'Raised to 220C', 'Raised to 230C' ] i = int(1) # Iterate through all 5 temperature attacks (190C-230C) for x in attacks: # Format the Bokeh plots for the temperature graphs p[x] = figure(width=900, plot_height=600, title=titles[i - 1], x_axis_label='Strain', y_axis_label='Stress (MPa)', x_range=(0, 0.0225), y_range=(0, 34)) output_file("Attack_5_" + x + ".html") p[x].title.text_font = 'Segoe UI' p[x].title.text_font_size = '26pt' p[x].title.align = 'center' p[x].xaxis.axis_label_text_font_size = '26pt' p[x].xaxis.major_label_text_font_size = '24pt' p[x].yaxis.axis_label_text_font_size = '26pt' p[x].yaxis.major_label_text_font_size = '24pt' p[x].min_border = 35 # Load the mat file for each temperature mat = sio.loadmat('Attack_5_' + x + '.mat') # Iterate through all five specimens for specimen in range(5): # print("Specmimen:",specimen+1) # Assign values for stress and strain from the MAT file stress = mat['Temp_Test_Batch_' + str(i) + '_' + x]['stress'][0][0][:, specimen] strain = mat['Temp_Test_Batch_' + str(i) + '_' + x]['strain'][0][0][:, specimen] p[x].line(strain, stress, legend=None, line_width=1, line_color=legendColors[specimen]) i = i + 1 # Write output files outputWrite = 'Plot_' + x + '.png' # export_png(p,filename=outputWrite) print("Finished Writing File: " + outputWrite) reset_output() # Debugging # l=gridplot([[p['190C']],[p['200C']],[p['210C']],[p['220C']],[p['230C']]]) # Vertical l = gridplot([[p['190C'], p['200C']], [p['210C'], None], [p['220C'], p['230C']]]) # Horizontal # export_png(l,filename='Gridplot.png') show(l)
def test_reset_output(self): plt._default_document = 10 plt._default_session = 10 plt._default_file = 10 plt._default_notebook = 10 plt.reset_output() self.assertTrue(isinstance(plt._default_document, plt.Document)) self.assertEqual(plt._default_session, None) self.assertEqual(plt._default_file, None) self.assertEqual(plt._default_notebook, None)
def readFirstFourAttacks(): attacks = [ 'SHD_XY', 'Solid_XY', 'Solid_XZ', 'Solid_XZ_with_Notch', 'Solid_XZ_with_Seam' ] # Create a dictionary to store each figure f = {} legendColors = ['navy', 'olive', 'firebrick', 'orange', 'purple', 'red'] titles = [ 'Density Adjustment', 'Orientation Change', 'Control Specimen', 'Notch Insertion', 'Seam Placement' ] i = int(1) # Load the mat file for each temperature mat = sio.loadmat('Tensile_Test_Data.mat') # Iterate through attacks for x in attacks: # Format the Bokeh plots for the temperature graphs f[x] = figure(width=900, plot_height=600, title=titles[i - 1] + ' ASTM D638 Results', x_axis_label='Strain', y_axis_label='Stress', x_range=(0, 0.0825), y_range=(0, 34)) output_file("Attack_" + str(i) + "_" + x + ".html") f[x].title.text_font = 'Segoe UI' f[x].title.text_font_size = '26pt' f[x].title.align = 'center' f[x].xaxis.axis_label_text_font_size = '26pt' f[x].xaxis.major_label_text_font_size = '24pt' f[x].yaxis.axis_label_text_font_size = '26pt' f[x].yaxis.major_label_text_font_size = '24pt' nums = np.array([[0, 1, 2, 3, 4, 5], [None, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], [0, 1, 2, 4, 5, None], [0, 1, 2, 3, 4, 5]]) # Iterate through all five specimens for specimen in nums[i - 1, :]: if specimen != None: # Assign values for stress and strain from the MAT file stress = mat[x]['stress'][0][0][:, specimen] strain = mat[x]['strain'][0][0][:, specimen] f[x].line(strain, stress, legend=None, line_width=1, line_color=legendColors[specimen]) i = i + 1 # Write output files outputWrite = 'Plot_' + x + '.png' # export_png(p,filename=outputWrite) print("Finished Writing File: " + outputWrite) reset_output() # Debugging show(f[x])
def show_figures(self, figures=None, sizing_mode=None, toolbar_location='above', ncols=None, plot_width=None, plot_height=None, toolbar_options=None, merge_tools=True, notebook=True, doc=None, notebook_url='http://localhost:8888', **kwargs): """ Method Description Parameters ---------- figures: List (default: None) An array of Canvases to display in a grid, given as a list of lists of bokeh.plotting.figure objects. If None, the instance's Canvas, along with its created widgets will be selected. sizing_mode: str (default: None) How the component should size itself. (allowed values: 'fixed', 'stretch_width', 'stretch_height', 'stretch_both', 'scale_width', 'scale_height', 'scale_both') toolbar_location: str (default: 'above') Where will the Bokeh Toolbar be located w.r.t. the Canvas (allowed values: 'above', 'below', 'left', 'right') ncols: int (default: None) Specify the number of columns you would like in your grid. You must only pass an un-nested list of plots (as opposed to a list of lists of plots) when using ncols. plot_width: int (default: None) The width you would like all your plots to be. If None the dimensions are automatically calculated. plot_height: int (default: None) The height you would like all your plots to be. If None the dimensions are automatically calculated. toolbar_options: Dict (default: None) A dictionary of options that will be used to construct the grid’s toolbar (an instance of ToolbarBox). If none is supplied, ToolbarBox’s defaults will be used. merge_tools: boolean (default: True) Combine tools from all child plots into a single toolbar. notebook: boolean (default: True) Output either at a Jupyter Notebook (True) or at a Browser via Python Script/Local Bokeh Server (False) doc: ```bokeh.io.curdoc``` instance (default: None) The basic foundation Bokeh uses to render the canvas (along with its widgets). notebook_url: str (default: 'http://localhost:8888') The IP address of the Jupyter Notebook. **kwargs: Dict Other parameters related to the Canvas' output (in case the output is a Jupyter Notebook) """ grid = None try: if figures is None: if len(self.widgets) != 0: figures = [[column(*self.widgets)],[self.figure]] else: figures = [[self.figure]] grid = bokeh.layouts.gridplot(figures, sizing_mode=sizing_mode, toolbar_location=toolbar_location, ncols=ncols, plot_width=plot_width, plot_height=plot_height, toolbar_options=toolbar_options, merge_tools=merge_tools) except TypeError as e: print (f'{e}. You must either: \n \t* Pass \'figures\' as a nested list of figures and leave ncols = None; or\n \t* Pass \'figures\' as a list and a non-None value to \'ncols\'.') def bokeh_app(doc): doc.add_root(grid) if notebook: reset_output() output_notebook(**kwargs) show(bokeh_app, notebook_url=notebook_url) else: bokeh_app(bokeh_io.curdoc() if doc is None else doc)
def histogram_days(filename, frame, name, color): bkh.reset_output() bkh.output_file(filename, title=filename) fig = bkh.figure(x_axis_type='datetime', title='Message count per day of ' + name, width=720, height=480) fig.line(frame.index, frame.frequency, color=color, line_width=3) fig.xaxis.axis_label = 'Date' fig.yaxis.axis_label = 'Frequency' bkh.show(fig) return
def discrete_charts(data, cols, plot_color): from bokeh.charts import Bar, output_file, show from bokeh.io import output_notebook from bokeh.plotting import figure from bokeh.layouts import gridplot from bokeh.layouts import column, row from bokeh.plotting import reset_output from bokeh.charts.attributes import cat from collections import Counter from IPython.display import display col_dict = {} for col in cols: col_dict[col] = pd.DataFrame.from_dict(Counter(data[col]), orient='index') col_dict[col].columns = [col] row_coll = [] rows = [] block = True for column in cols: block = True column_title_split = column.split('_') column_title = "" for column_title_piece in column_title_split: column_title += " " + column_title_piece.title() p = Bar(col_dict[column], values=column, title=column_title + ' Stats', color=plot_color, plot_width=300, plot_height=200, ylabel="", legend=None, toolbar_location=None) rows.append(p) if len(rows) == 3: block = False row_coll.append(list(rows)) reset_output() rows = [] if block: row_coll.append(list(rows)) #print row_coll gridplot = gridplot(row_coll) output_notebook() show(gridplot)
def output_image_wall(imgs, output_path, title, ids, info, info_name): dim = find_sqrt_root_roof(len(imgs)) img_dimension = imgs[0].shape[0] p = figure(title=title, width=dim * img_dimension, height=dim * img_dimension, tooltips=[('x,y', '@xs, @ys'), ('id', '@ids'), (info_name, '@info')]) p.x_range.range_padding = p.y_range.range_padding = 0 rgba_images = list() xs = list() ys = list() for i in range(len(imgs)): img = imgs[i] rgba = np.dstack((img, 255 - np.zeros(img.shape[:-1], dtype=np.uint8))) rgba_images.append(rgba) xs.append(i % dim) ys.append(i // dim) data = dict( images=rgba_images, xs=xs, ys=ys, ids=ids, info=info, ) p.image_rgba('images', source=data, x='xs', y='ys', dw=1, dh=1) additional_p = figure(title="Corresponding jpeg compress ratio", width=dim * img_dimension, height=dim * img_dimension, tooltips=[('x,y', '@xs, @ys'), ('id', '@ids'), (info_name, '@info')]) additional_p.x_range.range_padding = additional_p.y_range.range_padding = 0 additional_p.rect(source=data, x='xs', y='ys', width=1, height=1, color='black', hover_line_color='black', line_color=None, alpha='info') output_file(output_path, title=title) output_column = column([p, additional_p]) show(output_column) reset_output()
def plot_score( trainScore, TestScore, path=None, filename='plot', title='plot' ): output_file( path + '/' + str( filename ) + '.html' ) x_L = list( range( len( trainScore ) ) ) p = figure(title=title, x_axis_label='time', y_axis_label='score', plot_width=1200) p.line( x_L, trainScore, color='#B3DE69', legend='train score' ) p.line( x_L, TestScore, color='#CAB2D6', legend='test score' ) p.legend.location = 'bottom_left' show(p) reset_output()
def plot(df, selected): reset_output(state=None) p = Figure(title="Past 100 days Of Whatever Stock You Searched", x_axis_label="Date", x_axis_type='datetime', y_axis_label="Price", plot_width=800, plot_height=680) p.title.text_font = "arial" p.title.text_font_style = "bold" p.title.text_font_size = "12pt" p.title.align = "center" p.xaxis.axis_label_text_font = "arial" p.xaxis.axis_label_text_font_size = "10pt" p.xaxis.axis_label_text_font_style = "bold" p.yaxis.axis_label_text_font = "arial" p.yaxis.axis_label_text_font_size = "10pt" p.yaxis.axis_label_text_font_style = "bold" p.min_border_left = 0 df["index"] = pd.to_datetime(df["index"]) if 'open' in selected: p.line(df["index"][0:101], df["1. open"][0:101], color="navy", alpha=0.75, legend_label="Open") if 'close' in selected: p.line(df["index"][0:101], df["4. close"][0:101], color="green", alpha=0.75, legend_label="Close") if 'high' in selected: p.line(df["index"][0:101], df["2. high"][0:101], color="red", alpha=0.75, legend_label="High") if 'low' in selected: p.line(df["index"][0:101], df["3. low"][0:101], color="purple", alpha=0.75, legend_label="Low") p.legend.location = "bottom_right" return p
def histogram_hourofday(filename, metrics, key, title_str, ylabel): bkh.reset_output() bkh.output_file(filename, title=filename) hours = [ "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00", ] fig = bkh.figure(x_range=hours, title=title_str, width=1280, height=480) fig.vbar( x=dodge("index", 0.35, range=fig.x_range), top="frequency", width=0.3, source=metrics["A"][key], color=colors[0], legend=metrics["A"]["name"], ) fig.vbar( x=dodge("index", 0.65, range=fig.x_range), top="frequency", width=0.3, source=metrics["B"][key], color=colors[1], legend=metrics["B"]["name"], ) fig.xaxis.axis_label = "Time" fig.yaxis.axis_label = ylabel bkh.show(fig) return
def plot(self): reset_output() output_file("result.html") TOOLTIPS = [ ("index", "$index"), ("(x,y)" , "($x, $y)"), ] p1 = figure( title="Time - SWA", width=600, height=200, x_axis_label='Time', y_axis_label='SWA', tooltips=TOOLTIPS ) p2 = figure( title="Time - Throttle", width=600, height=200, x_range=p1.x_range, x_axis_label='Time', y_axis_label='Throttle', tooltips=TOOLTIPS ) p3 = figure( title="Time - Brake", width=600, height=200, x_range=p2.x_range, x_axis_label='Time', y_axis_label='Brake', tooltips=TOOLTIPS ) p1.line(df.iloc[:, 0], df.iloc[:, 1], legend="SWA") p2.line(df.iloc[:, 0], df.iloc[:, 2], legend="Throttle") p3.line(df.iloc[:, 0], df.iloc[:, 3], legend="Brake") # p = gridplot([[p1,p2,p3]]) # show(p) first = Panel(child=gridplot([[p1, p2],[p3,None]]), title='first') second = Panel(child=gridplot([[p1, p2],[p3,None]]), title='second') tabs = Tabs(tabs=[first, second]) # layout = gridplot([[p1,p2],[p3,None]]) #layout = row(column(p1, p2), p3) show(tabs)
def wrapper(*args, **kwargs): reset_output() docname = prefix + str(uuid.uuid4()) session = Session(name=url, root_url=url) session.use_doc(docname) session.load_document(curdoc()) session.publish() curdoc().autoadd = False curdoc().autostore = False obj = func(*args, **kwargs) tag = embed.autoload_server(obj, session, public=True) obj._tag = tag curdoc().add(obj) changed = session.store_document(curdoc()) logger.debug("stored: %s", str(changed)) return obj
def test_replot_remove(self): ar_downsample._loadAR() reset_output() sess = Session(client=app.test_client()) output_server('Census', session=sess) source = ServerDataSource( expr={'op': 'Field', 'args': [':leaf', 'bivariate']} ) plot = figure() plot.square('A', 'B', source=source) ar_downsample.replot(plot, remove_original=False) self.assertTrue(plot in curdoc().context.children, "Not retained") ar_downsample.replot(plot, remove_original=True) self.assertTrue(plot not in curdoc().context.children, "Not removed") try: ar_downsample.replot(plot, remove_original=True) except: self.assertTrue(False, "Error reploting plot not in curdoc")
def test_contour_recipe(self): ar_downsample._loadAR() reset_output() sess = Session(client=app.test_client()) output_server('Census', session=sess) source = ServerDataSource( expr={'op': 'Field', 'args': [':leaf', 'bivariate']} ) plot = figure(plot_width=600, plot_height=400, title="Test Title") plot.square('A', 'B', source=source) plot2 = ar_downsample.contours(plot, title="Contour") source2 = self._find_source(plot2) self.assertEquals("Contour", plot2.title) self.assertEquals(type(source2), ServerDataSource) transform = source2.transform self.assertEquals(type(transform['info']), ar_downsample.Const) self.assertEquals(type(transform['agg']), ar_downsample.Count) self.assertEquals(type(transform['shader']), ar_downsample.Seq) self.assertEquals(transform['shader'].out, "multi_line")
def test_heatmap_recipe(self): ar_downsample._loadAR() reset_output() sess = Session(client=app.test_client()) output_server('Census', session=sess) source = ServerDataSource( expr={'op': 'Field', 'args': [':leaf', 'bivariate']} ) plot = figure(plot_width=600, plot_height=400, title="Test Title") plot.square('A', 'B', source=source) plot2 = ar_downsample.heatmap(plot, palette="Reds9", reserve_val=0, points=True, client_color=True, title="Test Title 2") source2 = self._find_source(plot2) self.assertEquals("Test Title 2", plot2.title) self.assertEquals(type(source2), ServerDataSource) transform = source2.transform self.assertEquals(type(transform['info']), ar_downsample.Const) self.assertEquals(type(transform['agg']), ar_downsample.Count) self.assertEquals(type(transform['shader']), ar_downsample.Seq) self.assertEquals(transform['shader'].out, "image")
def TopAgencyforEachzipCode(self,mapPoints,dat): """This method is to create a choropleth map for NYC in which the shape color for each zipcode represents its top agency in number of complaints.""" reset_output() plot = figure() polygons = {'lat_list':[],'lng_list':[],'color_list':[]} ##make a dict to record_index = 0 zipCodes = [];longitudes = [];latitudes = [];agencies_names = [];complaint_count = [] colors = {'NYPD' : '#7f0000','DOT' : '#fee8c8','DEP' : '#fdd49e','DPR' : '#fdbb84','HPD' : '#fc8d59','FDNY' : '#ef6548','DOHMH' : '#d73000','TLC' : '#b30000'} for r in dat.iterRecords(): currentZip = r[0] ####make sure type of the data keep the same intzip = int(currentZip) if intzip in self.zipBoroughdata: zipCodes.append(intzip) ##get shape for this zip shape = dat.shapeRecord(record_index).shape points = shape.points lngs = [p[0] for p in points] lats = [p[1]for p in points] #store lat/lng for current zip shape polygons['lng_list'].append(lngs) polygons['lat_list'].append(lats) longitudes.append(lngs) latitudes.append(lats) ##calculate color, according to number of complaints if currentZip in mapPoints['zip_complaints']: sortedlist = sorted(mapPoints['zip_complaints'][currentZip].items(),key = operator.itemgetter(1),reverse = True) agency = sortedlist[0][0] complaints = sortedlist[0][1] ##print zipcode,agency if agency in colors: agencies_names.append(agency) complaint_count.append(complaints) color = colors[agency] else: agencies_names.append('NA') complaint_count.append('NA') color = 'white' else: color = 'white' agencies_names.append('NA') complaint_count.append('NA') polygons['color_list'].append(color) record_index += 1 file1 = output_file('TopAgencyForEachZipcode.html',title ="TopAgencyForZipCode ") TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,previewsave" source = ColumnDataSource( data = dict( longitudes = longitudes, latitudes = latitudes, agencies_names = agencies_names, complaint_count = complaint_count, zipCodes = zipCodes ) ) ##create the polygons patches(polygons['lng_list'],polygons['lat_list'],\ fill_color = polygons['color_list'],line_color = 'gray',\ tools = TOOLS,plot_width = 1100, plot_height = 700,\ title = 'Agency with top number of Complaints according to Zip Codes',source = source) hover = curplot().select(dict(type = HoverTool)) hover.tooltips = OrderedDict([("Zip Code","@zipCodes"),("Top Agency Name","@agencies_names"),("Complaints","@complaint_count")]) hold() x,y = -74.2,40.7 for agency in colors: rect([x+0.01],[y],color = colors[agency],width = 0.01,height =.02) text([x],[y],text = agency,angle = 0,text_font_size = "8pt",font_weight = "bold",text_align = "right",text_baseline = "middle") y = y + .02 show()
def calculate_proxy(snp,pop,request,r2_d="r2"): import csv,json,operator,os,sqlite3,subprocess,sys,time from multiprocessing.dummy import Pool start_time=time.time() # Set data directories data_dir="/local/content/ldlink/data/" gene_dir=data_dir+"refGene/sorted_refGene.txt.gz" recomb_dir=data_dir+"recomb/genetic_map_autosomes_combined_b37.txt.gz" snp_dir=data_dir+"snp142/snp142_annot_2.db" pop_dir=data_dir+"1000G/Phase3/samples/" vcf_dir=data_dir+"1000G/Phase3/genotypes/ALL.chr" tmp_dir="./tmp/" # Ensure tmp directory exists if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) # Create JSON output out_json=open(tmp_dir+'proxy'+request+".json","w") output={} # Find coordinates (GRCh37/hg19) for SNP RS number # Connect to snp142 database conn=sqlite3.connect(snp_dir) conn.text_factory=str cur=conn.cursor() def get_coords(rs): id=rs.strip("rs") t=(id,) cur.execute("SELECT * FROM tbl_"+id[-1]+" WHERE id=?", t) return cur.fetchone() # Find RS number in snp142 database snp_coord=get_coords(snp) # Close snp142 connection cur.close() conn.close() if snp_coord==None: output["error"]=snp+" is not in dbSNP build 142." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise # Select desired ancestral populations pops=pop.split("+") pop_dirs=[] for pop_i in pops: if pop_i in ["ALL","AFR","AMR","EAS","EUR","SAS","ACB","ASW","BEB","CDX","CEU","CHB","CHS","CLM","ESN","FIN","GBR","GIH","GWD","IBS","ITU","JPT","KHV","LWK","MSL","MXL","PEL","PJL","PUR","STU","TSI","YRI"]: pop_dirs.append(pop_dir+pop_i+".txt") else: output["error"]=pop_i+" is not an ancestral population. Choose one of the following ancestral populations: AFR, AMR, EAS, EUR, or SAS; or one of the following sub-populations: ACB, ASW, BEB, CDX, CEU, CHB, CHS, CLM, ESN, FIN, GBR, GIH, GWD, IBS, ITU, JPT, KHV, LWK, MSL, MXL, PEL, PJL, PUR, STU, TSI, or YRI." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise get_pops="cat "+" ".join(pop_dirs)+" > "+tmp_dir+"pops_"+request+".txt" subprocess.call(get_pops, shell=True) # Get population ids pop_list=open(tmp_dir+"pops_"+request+".txt").readlines() ids=[] for i in range(len(pop_list)): ids.append(pop_list[i].strip()) pop_ids=list(set(ids)) # Extract query SNP phased genotypes vcf_file=vcf_dir+snp_coord[1]+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz" tabix_snp_h="tabix -H {0} | grep CHROM".format(vcf_file) proc_h=subprocess.Popen(tabix_snp_h, shell=True, stdout=subprocess.PIPE) head=proc_h.stdout.readlines()[0].strip().split() tabix_snp="tabix {0} {1}:{2}-{2} | grep -v -e END > {3}".format(vcf_file, snp_coord[1], snp_coord[2], tmp_dir+"snp_no_dups_"+request+".vcf") subprocess.call(tabix_snp, shell=True) # Check SNP is in the 1000G population, has the correct RS number, and not monoallelic vcf=open(tmp_dir+"snp_no_dups_"+request+".vcf").readlines() if len(vcf)==0: output["error"]=snp+" is not in 1000G reference panel." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True) return("","") raise elif len(vcf)>1: geno=[] for i in range(len(vcf)): if vcf[i].strip().split()[2]==snp: geno=vcf[i].strip().split() if geno==[]: output["error"]=snp+" is not in 1000G reference panel." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True) return("","") raise else: geno=vcf[0].strip().split() if geno[2]!=snp: output["warning"]="Genomic position for query variant ("+snp+") does not match RS number at 1000G position ("+geno[2]+")" snp=geno[2] if "," in geno[3] or "," in geno[4]: output["error"]=snp+" is not a biallelic variant." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True) return("","") raise index=[] for i in range(9,len(head)): if head[i] in pop_ids: index.append(i) genotypes={"0":0, "1":0} for i in index: sub_geno=geno[i].split("|") for j in sub_geno: if j in genotypes: genotypes[j]+=1 else: genotypes[j]=1 if genotypes["0"]==0 or genotypes["1"]==0: output["error"]=snp+" is monoallelic in the "+pop+" population." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True) return("","") raise # Define window of interest around query SNP window=500000 coord1=int(snp_coord[2])-window if coord1<0: coord1=0 coord2=int(snp_coord[2])+window print "" # Calculate proxy LD statistics in parallel threads=4 block=(2*window)/4 commands=[] for i in range(threads): if i==min(range(threads)) and i==max(range(threads)): command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1)+" "+str(coord2)+" "+request+" "+str(i) elif i==min(range(threads)): command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1)+" "+str(coord1+block)+" "+request+" "+str(i) elif i==max(range(threads)): command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1+(block*i)+1)+" "+str(coord2)+" "+request+" "+str(i) else: command="python LDproxy_sub.py "+snp+" "+snp_coord[1]+" "+str(coord1+(block*i)+1)+" "+str(coord1+(block*(i+1)))+" "+request+" "+str(i) commands.append(command) processes=[subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for command in commands] # collect output in parallel def get_output(process): return process.communicate()[0].splitlines() pool = Pool(len(processes)) out_raw=pool.map(get_output, processes) pool.close() pool.join() # Aggregate output out_prox=[] for i in range(len(out_raw)): for j in range(len(out_raw[i])): col=out_raw[i][j].strip().split("\t") col[6]=int(col[6]) col[7]=float(col[7]) col[8]=float(col[8]) col.append(abs(int(col[6]))) out_prox.append(col) # Sort output if r2_d not in ["r2","d"]: if "warning" in output: output["warning"]=output["warning"]+". "+r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default" else: output["warning"]=r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default" r2_d="r2" out_dist_sort=sorted(out_prox, key=operator.itemgetter(14)) if r2_d=="r2": out_ld_sort=sorted(out_dist_sort, key=operator.itemgetter(8), reverse=True) else: out_ld_sort=sorted(out_dist_sort, key=operator.itemgetter(7), reverse=True) # Populate JSON and text output outfile=open(tmp_dir+"proxy"+request+".txt","w") header=["RS_Number","Coord","Alleles","MAF","Distance","Dprime","R2","Correlated_Alleles","RegulomeDB","Function"] print >> outfile, "\t".join(header) track=open(tmp_dir+"track"+request+".txt","w") print >> track, "browser position chr"+str(snp_coord[1])+":"+str(coord1)+"-"+str(coord2) print >> track, "" print >> track, "track name=\""+snp+"\" description=\"Query Variant: "+snp+"\" color=108,108,255" query_snp={} query_snp["RS"]=out_ld_sort[0][3] query_snp["Alleles"]=out_ld_sort[0][1] query_snp["Coord"]=out_ld_sort[0][2] query_snp["Dist"]=out_ld_sort[0][6] query_snp["Dprime"]=str(round(float(out_ld_sort[0][7]),4)) query_snp["R2"]=str(round(float(out_ld_sort[0][8]),4)) query_snp["Corr_Alleles"]=out_ld_sort[0][9] query_snp["RegulomeDB"]=out_ld_sort[0][10] query_snp["MAF"]=str(round(float(out_ld_sort[0][11]),4)) query_snp["Function"]=out_ld_sort[0][13] output["query_snp"]=query_snp temp=[query_snp["RS"],query_snp["Coord"],query_snp["Alleles"],query_snp["MAF"],str(query_snp["Dist"]),str(query_snp["Dprime"]),str(query_snp["R2"]),query_snp["Corr_Alleles"],query_snp["RegulomeDB"],query_snp["Function"]] print >> outfile, "\t".join(temp) chr,pos=query_snp["Coord"].split(':') temp2=[chr,pos,pos,query_snp["RS"]] print >> track, "\t".join(temp2) print >> track, "" if r2_d=="r2": print >> track, "track name=\"0.8<R2<1.0\" description=\"Proxy Variants with 0.8<R2<1.0\" color=198,129,0" else: print >> track, "track name=\"0.8<D'<1.0\" description=\"Proxy Variants with 0.8<D'<1.0\" color=198,129,0" proxies={} rows=[] digits=len(str(len(out_ld_sort))) r2_d_prior=1 counter=0 cutoff=[0.8,0.6,0.4,0.2,0.0] for i in range(1,len(out_ld_sort)): if float(out_ld_sort[i][8])>0.01 and out_ld_sort[i][3]!=snp: proxy_info={} row=[] proxy_info["RS"]=out_ld_sort[i][3] proxy_info["Alleles"]=out_ld_sort[i][4] proxy_info["Coord"]=out_ld_sort[i][5] proxy_info["Dist"]=out_ld_sort[i][6] proxy_info["Dprime"]=str(round(float(out_ld_sort[i][7]),4)) proxy_info["R2"]=str(round(float(out_ld_sort[i][8]),4)) proxy_info["Corr_Alleles"]=out_ld_sort[i][9] proxy_info["RegulomeDB"]=out_ld_sort[i][10] proxy_info["MAF"]=str(round(float(out_ld_sort[i][12]),4)) proxy_info["Function"]=out_ld_sort[i][13] proxies["proxy_"+(digits-len(str(i)))*"0"+str(i)]=proxy_info chr,pos=proxy_info["Coord"].split(':') # Adding a row for the Data Table row.append(proxy_info["RS"]) row.append(chr) row.append(pos) row.append(proxy_info["Alleles"]) row.append(str(round(float(proxy_info["MAF"]),4))) row.append(proxy_info["Dist"]) row.append(str(round(float(proxy_info["Dprime"]),4))) row.append(str(round(float(proxy_info["R2"]),4))) row.append(proxy_info["Corr_Alleles"]) row.append(proxy_info["RegulomeDB"]) row.append("HaploReg link") row.append(proxy_info["Function"]) rows.append(row) temp=[proxy_info["RS"],proxy_info["Coord"],proxy_info["Alleles"],proxy_info["MAF"],str(proxy_info["Dist"]),str(proxy_info["Dprime"]),str(proxy_info["R2"]),proxy_info["Corr_Alleles"],proxy_info["RegulomeDB"],proxy_info["Function"]] print >> outfile, "\t".join(temp) temp2=[chr,pos,pos,proxy_info["RS"]] print >> track, "\t".join(temp2) if r2_d=="r2" and cutoff[counter]<r2_d_prior and float(proxy_info["R2"])<=cutoff[counter]: print >> track, "" print >> track, "track name=\""+str(cutoff[counter+1])+"<R2<"+str(cutoff[counter])+"\" description=\"Proxy Variants with "+str(cutoff[counter+1])+"<R2<"+str(cutoff[counter])+"\" color=198,129,0" counter+=1 elif r2_d=="d" and cutoff[counter]<r2_d_prior and float(proxy_info["Dprime"])<=cutoff[counter]: print >> track, "" print >> track, "track name=\""+str(cutoff[counter+1])+"<D'<"+str(cutoff[counter])+"\" description=\"Proxy Variants with "+str(cutoff[counter+1])+"<D'<"+str(cutoff[counter])+"\" color=198,129,0" counter+=1 if r2_d=="r2": r2_d_prior=proxy_info["R2"] else: r2_d_prior=proxy_info["Dprime"] output["aaData"]=rows output["proxy_snps"]=proxies # Output JSON and text file json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() outfile.close() track.close() # Organize scatter plot data q_rs=[] q_allele=[] q_coord=[] q_maf=[] p_rs=[] p_allele=[] p_coord=[] p_maf=[] dist=[] d_prime=[] d_prime_round=[] r2=[] r2_round=[] corr_alleles=[] regdb=[] funct=[] color=[] size=[] for i in range(len(out_ld_sort)): q_rs_i,q_allele_i,q_coord_i,p_rs_i,p_allele_i,p_coord_i,dist_i,d_prime_i,r2_i,corr_alleles_i,regdb_i,q_maf_i,p_maf_i,funct_i,dist_abs=out_ld_sort[i] if float(r2_i)>0.01: q_rs.append(q_rs_i) q_allele.append(q_allele_i) q_coord.append(float(q_coord_i.split(":")[1])/1000000) q_maf.append(str(round(float(q_maf_i),4))) if p_rs_i==".": p_rs_i=p_coord_i p_rs.append(p_rs_i) p_allele.append(p_allele_i) p_coord.append(float(p_coord_i.split(":")[1])/1000000) p_maf.append(str(round(float(p_maf_i),4))) dist.append(str(round(dist_i/1000000.0,4))) d_prime.append(float(d_prime_i)) d_prime_round.append(str(round(float(d_prime_i),4))) r2.append(float(r2_i)) r2_round.append(str(round(float(r2_i),4))) corr_alleles.append(corr_alleles_i) # Correct Missing Annotations if regdb_i==".": regdb_i="" regdb.append(regdb_i) if funct_i==".": funct_i="" if funct_i=="NA": funct_i="none" funct.append(funct_i) # Set Color if i==0: color_i="blue" elif funct_i!="none" and funct_i!="": color_i="red" else: color_i="orange" color.append(color_i) # Set Size size_i=9+float(p_maf_i)*14.0 size.append(size_i) # Begin Bokeh Plotting from collections import OrderedDict from bokeh.embed import components,file_html from bokeh.models import HoverTool,LinearAxis,Range1d from bokeh.plotting import ColumnDataSource,curdoc,figure,output_file,reset_output,save from bokeh.resources import CDN reset_output() source=ColumnDataSource( data=dict( qrs=q_rs, q_alle=q_allele, q_maf=q_maf, prs=p_rs, p_alle=p_allele, p_maf=p_maf, dist=dist, r=r2_round, d=d_prime_round, alleles=corr_alleles, regdb=regdb, funct=funct, ) ) # Proxy Plot x=p_coord if r2_d=="r2": y=r2 else: y=d_prime whitespace=0.01 xr=Range1d(start=coord1/1000000.0-whitespace, end=coord2/1000000.0+whitespace) yr=Range1d(start=-0.03, end=1.03) sup_2=u"\u00B2" proxy_plot=figure( title="Proxies for "+snp+" in "+pop, min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False, plot_width=900, plot_height=600, x_range=xr, y_range=yr, tools="hover,tap,pan,box_zoom,box_select,reset,previewsave", logo=None, toolbar_location="above") tabix_recomb="tabix -fh {0} {1}:{2}-{3} > {4}".format(recomb_dir, snp_coord[1], coord1-whitespace, coord2+whitespace, tmp_dir+"recomb_"+request+".txt") subprocess.call(tabix_recomb, shell=True) filename=tmp_dir+"recomb_"+request+".txt" recomb_raw=open(filename).readlines() recomb_x=[] recomb_y=[] for i in range(len(recomb_raw)): chr,pos,rate=recomb_raw[i].strip().split() recomb_x.append(int(pos)/1000000.0) recomb_y.append(float(rate)/100.0) proxy_plot.line(recomb_x, recomb_y, size=12, color="black", alpha=0.5) proxy_plot.circle(x, y, size=size, source=source, color=color, alpha=0.5) hover=proxy_plot.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("Query SNP", "@qrs @q_alle"), ("Proxy SNP", "@prs @p_alle"), ("Distance (Mb)", "@dist"), ("MAF (Query,Proxy)", "@q_maf,@p_maf"), ("R"+sup_2, "@r"), ("D\'", "@d"), ("Correlated Alleles", "@alleles"), ("RegulomeDB", "@regdb"), ("Functional Class", "@funct"), ]) proxy_plot.text(x, y, text=regdb, alpha=1, text_font_size="7pt", text_baseline="middle", text_align="center", angle=0) if r2_d=="r2": proxy_plot.yaxis.axis_label="R"+sup_2 else: proxy_plot.yaxis.axis_label="D\'" proxy_plot.extra_y_ranges = {"y2_axis": Range1d(start=-3, end=103)} proxy_plot.add_layout(LinearAxis(y_range_name="y2_axis", axis_label="Combined Recombination Rate (cM/Mb)"), "right") # Rug Plot y2_ll=[-0.03]*len(x) y2_ul=[1.03]*len(x) yr_rug=Range1d(start=-0.03, end=1.03) rug=figure( x_range=xr, y_range=yr_rug, border_fill='white', y_axis_type=None, title="", min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False, plot_width=900, plot_height=50, tools="xpan,tap") rug.segment(x, y2_ll, x, y2_ul, source=source, color=color, alpha=0.5, line_width=1) rug.toolbar_location=None # Gene Plot tabix_gene="tabix -fh {0} {1}:{2}-{3} > {4}".format(gene_dir, snp_coord[1], coord1, coord2, tmp_dir+"genes_"+request+".txt") subprocess.call(tabix_gene, shell=True) filename=tmp_dir+"genes_"+request+".txt" genes_raw=open(filename).readlines() genes_plot_start=[] genes_plot_end=[] genes_plot_y=[] genes_plot_name=[] exons_plot_x=[] exons_plot_y=[] exons_plot_w=[] exons_plot_h=[] exons_plot_name=[] exons_plot_id=[] exons_plot_exon=[] lines=[0] gap=80000 tall=0.75 if genes_raw!=None: for i in range(len(genes_raw)): bin,name_id,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,score,name2,cdsStartStat,cdsEndStat,exonFrames=genes_raw[i].strip().split() name=name2 id=name_id e_start=exonStarts.split(",") e_end=exonEnds.split(",") # Determine Y Coordinate i=0 y_coord=None while y_coord==None: if i>len(lines)-1: y_coord=i+1 lines.append(int(txEnd)) elif int(txStart)>(gap+lines[i]): y_coord=i+1 lines[i]=int(txEnd) else: i+=1 genes_plot_start.append(int(txStart)/1000000.0) genes_plot_end.append(int(txEnd)/1000000.0) genes_plot_y.append(y_coord) genes_plot_name.append(name+" ") for i in range(len(e_start)-1): if strand=="+": exon=i+1 else: exon=len(e_start)-1-i width=(int(e_end[i])-int(e_start[i]))/1000000.0 x_coord=int(e_start[i])/1000000.0+(width/2) exons_plot_x.append(x_coord) exons_plot_y.append(y_coord) exons_plot_w.append(width) exons_plot_h.append(tall) exons_plot_name.append(name) exons_plot_id.append(id) exons_plot_exon.append(exon) n_rows=len(lines) genes_plot_yn=[n_rows-x+0.5 for x in genes_plot_y] exons_plot_yn=[n_rows-x+0.5 for x in exons_plot_y] yr2=Range1d(start=0, end=n_rows) source2=ColumnDataSource( data=dict( exons_plot_name=exons_plot_name, exons_plot_id=exons_plot_id, exons_plot_exon=exons_plot_exon, ) ) if len(lines)<3: plot_h_pix=150 else: plot_h_pix=150+(len(lines)-2)*50 gene_plot=figure( x_range=xr, y_range=yr2, border_fill='white', title="", min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False, plot_width=900, plot_height=plot_h_pix, tools="hover,tap,xpan,box_zoom,reset,previewsave", logo=None) gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end, genes_plot_yn, color="black", alpha=1, line_width=2) gene_plot.rect(exons_plot_x, exons_plot_yn, exons_plot_w, exons_plot_h, source=source2, fill_color="grey", line_color="grey") gene_plot.xaxis.axis_label="Chromosome "+snp_coord[1]+" Coordinate (Mb)(GRCh37)" gene_plot.yaxis.axis_label="Genes" gene_plot.ygrid.grid_line_color=None gene_plot.yaxis.axis_line_color=None gene_plot.yaxis.minor_tick_line_color=None gene_plot.yaxis.major_tick_line_color=None gene_plot.yaxis.major_label_text_color=None hover=gene_plot.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("Gene", "@exons_plot_name"), ("ID", "@exons_plot_id"), ("Exon", "@exons_plot_exon"), ]) gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt", text_font_style="bold", text_baseline="middle", text_align="right", angle=0) gene_plot.toolbar_location="below" #html=file_html(curdoc(), CDN, "Test Plot") #out_html=open("LDproxy.html","w") #print >> out_html, html #out_html.close() out_script,out_div=components(curdoc(), CDN) reset_output() # Print run time statistics pop_list=open(tmp_dir+"pops_"+request+".txt").readlines() print "\nNumber of Individuals: "+str(len(pop_list)) print "SNPs in Region: "+str(len(out_prox)) duration=time.time() - start_time print "Run time: "+str(duration)+" seconds\n" # Remove temporary files subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True) subprocess.call("rm "+tmp_dir+"genes_"+request+".txt", shell=True) subprocess.call("rm "+tmp_dir+"recomb_"+request+".txt", shell=True) # Return plot output return(out_script,out_div)
def test_default_resources_minified(self): plt.output_file("foo.html") self.assertEqual(plt._default_file["resources"].minified, True) plt.reset_output()
def generateCrossFilePlotsForBucket(i, lowerBound, upperBound, navigatorDF, retFilename): global bucketDir; global timeUnitString; aggregateLegendDict = {}; figuresForAllFiles = []; fileName = bucketDir + "/bucket-" + str(i) + ".html"; reset_output(); intervalTitle = "Interval #" + str(i) + ". {:,}".format(lowerBound) + \ " to " + "{:,}".format(upperBound) + \ " " + timeUnitString + "."; # Generate a navigator chart, which shows where we are in the # trace and allows moving around the trace. # navigatorFigure = generateNavigatorFigure(navigatorDF, i, intervalTitle); figuresForAllFiles.append(navigatorFigure); # Select from the dataframe for this file the records whose 'start' # and 'end' timestamps fall within the lower and upper bound. # for fname in sorted(perFileDataFrame.keys()): fileDF = perFileDataFrame[fname]; # Select operations whose start timestamp falls within # the current interval, delimited by lowerBound and upperBound. # startInBucket = fileDF.loc[(fileDF['start'] >= lowerBound) & (fileDF['start'] < upperBound)]; # Select operations whose end timestamp falls within # the current interval, delimited by lowerBound and upperBound. # endInBucket = fileDF.loc[(fileDF['end'] > lowerBound) & (fileDF['end'] <= upperBound)]; # Select operations that begin before this interval and end after # this interval, but continue throughout this interval. The interval # is delimited by lowerBound and upperBound. # spanBucket = fileDF.loc[(fileDF['start'] < lowerBound) & (fileDF['end'] > upperBound)]; frames = [startInBucket, endInBucket, spanBucket]; bucketDF = pd.concat(frames).drop_duplicates().reset_index(drop=True); if (bucketDF.size == 0): continue; # If the end of the function is outside the interval, let's pretend # that it is within the interval, otherwise we won't see any data about # it when we hover. This won't have the effect of showing wrong # data to the user. # mask = bucketDF.end >= upperBound; bucketDF.loc[mask, 'end'] = upperBound-1; # Same adjustment as above if the start of the operation falls outside # the interval's lower bound. # mask = bucketDF.start < lowerBound; bucketDF.loc[mask, 'start'] = lowerBound; largestStackDepth = bucketDF['stackdepthNext'].max(); figureTitle = fname; figure, legendDict = generateBucketChartForFile(figureTitle, bucketDF, largestStackDepth, lowerBound, upperBound); aggregateLegendDict.update(legendDict); figuresForAllFiles.append(figure); # Create the legend for this file and insert it after the navigator figure if (len(aggregateLegendDict) > 0): legendFigure = createLegendFigure(aggregateLegendDict); figuresForAllFiles.insert(1, legendFigure); save(column(figuresForAllFiles), filename = fileName, title=intervalTitle, resources=CDN); retFilename.value = fileName;
def main(): global arrowLeftImg; global arrowRightImg; global bucketDir; global perFuncDF; global targetParallelism; configSupplied = False; figuresForAllFunctions = []; # Set up the argument parser # parser = argparse.ArgumentParser(description= 'Visualize operation log'); parser.add_argument('files', type=str, nargs='*', help='log files to process'); parser.add_argument('-c', '--config', dest='configFile', default=''); parser.add_argument('-d', '--dumpCleanData', dest='dumpCleanData', default=False, action='store_true', help='Dump clean log data. Clean data will \ not include incomplete function call records, \ e.g., if there is a function begin record, but\ no function end record, or vice versa.'); parser.add_argument('-j', dest='jobParallelism', type=int, default='0'); args = parser.parse_args(); if (len(args.files) == 0): parser.print_help(); sys.exit(1); # Determine the target job parallelism if (args.jobParallelism > 0): targetParallelism = args.jobParallelism; else: targetParallelism = multiprocessing.cpu_count() * 2; # Get names of standard CSS colors that we will use for the legend initColorList(); # Read the configuration file, if supplied. if (args.configFile != ''): configSupplied = parseConfigFile(args.configFile); if (not configSupplied): pluralSuffix = ""; print(color.BLUE + color.BOLD + "Will deem as outliers all function instances whose runtime " + "was higher than the " + str(PERCENTILE * 100) + "th percentile for that function." + color.END); # Create a directory for the files that display the data summarized # in each bucket of the outlier histogram. We call these "bucket files". # if not os.path.exists(bucketDir): os.makedirs(bucketDir); # Parallelize this later, so we are working on files in parallel. for fname in args.files: processFile(fname, args.dumpCleanData); # Normalize all intervals by subtracting the first timestamp. normalizeIntervalData(); # Generate plots of time series slices across all files for each bucket # in the outlier histogram. Save each cross-file slice to an HTML file. # fileNameList = generateTSSlicesForBuckets(); totalFuncs = len(perFuncDF.keys()); i = 0; # Generate a histogram of outlier durations for func in sorted(perFuncDF.keys()): funcDF = perFuncDF[func]; figure = createOutlierHistogramForFunction(func, funcDF, fileNameList); if (figure is not None): figuresForAllFunctions.append(figure); i += 1; percentComplete = float(i) / float(totalFuncs) * 100; print(color.BLUE + color.BOLD + " Generating outlier histograms... "), sys.stdout.write("%d%% complete \r" % (percentComplete) ); sys.stdout.flush(); print(color.END); reset_output(); output_file(filename = "WT-outliers.html", title="Outlier histograms"); show(column(figuresForAllFunctions));
def main(nc, save_dir, display=False): cf.create_dir(save_dir) with xr.open_dataset(nc, mask_and_scale=False) as ds: subsite = ds.subsite node = ds.node sensor = ds.sensor stream = ds.stream deployment = 'D0000{}'.format(str(np.unique(ds.deployment)[0])) t0 = ds.time_coverage_start t1 = ds.time_coverage_end sub_dir = os.path.join(save_dir, subsite, '{}-{}-{}'.format(subsite, node, sensor), stream, deployment) cf.create_dir(sub_dir) misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm'] reg_ex = re.compile(r'\b(?:%s)\b' % '|'.join(misc)) # keep variables that are not in the regular expression vars = [s for s in ds.data_vars if not reg_ex.search(s)] x = ds['time'].data for v in vars: # List of dataset variables # print v # Filter out variables that are strings, datetimes, or qc related if ds[v].dtype.kind == 'S' or ds[v].dtype == np.dtype('datetime64[ns]') or 'time' in v or 'qc_results' in v or 'qc_executed' in v: continue y = ds[v] try: y_units = y.units except AttributeError: y_units = None y_data = y.data if y_data.ndim > 1: continue source = ColumnDataSource( data=dict( x=x, y=y_data, ) ) gr = cf.get_global_ranges(subsite, node, sensor, v) output_file('{}/{}-{}-{}.html'.format(sub_dir, v, ds.time_coverage_start.replace(':', ''), ds.time_coverage_end.replace(':', ''))) p = figure(width=1200, height=800, title='{}-{}-{}: {} - {} - {}, Stream: {}'.format(subsite, node, sensor, deployment, t0, t1, stream), x_axis_label='Time (GMT)', y_axis_label='{} ({})'.format(v, y_units), x_axis_type='datetime', tools=[tools]) p.line('x', 'y', legend=v, line_width=3, source=source) p.circle('x', 'y', fill_color='white', size=4, source=source) if gr: low_box = BoxAnnotation(top=gr[0], fill_alpha=0.05, fill_color='red') mid_box = BoxAnnotation(top=gr[1], bottom=gr[0], fill_alpha=0.1, fill_color='green') high_box = BoxAnnotation(bottom=gr[1], fill_alpha=0.05, fill_color='red') p.add_layout(low_box) p.add_layout(mid_box) p.add_layout(high_box) if display: show(p) else: save(p) reset_output()
def create_choropleth(output_path, json_file, shade_data_file, palette_colour, output_type, step, min_range, max_range, reverse, dynamic=True): reset_output() if isinstance(shade_data_file, str): results_data = pd.read_csv(shade_data_file) else: results_data = shade_data_file # calculate the maximum number of shades to show in final output if not user specified if dynamic: min_range = h.rounddown_nearest_ten(np.nanmin(list(results_data.result*100))) max_range = h.roundup_nearest_ten(np.nanmax(list(results_data.result*100))) step = set_dynamic_step(min_range, max_range) # check for a whole number in user defined values - return an error if not shade_no = int(((max_range+step)-min_range)/step) plot_dict = {} # dict used to store each plots data - one for each shade to display. lower_limit = 0 for upper_limit in range(min_range, max_range+step, step): temp_df = results_data[(results_data['result'] > lower_limit/100) & (results_data['result'] <= upper_limit/100)] if len(temp_df.index) > 0: plot_dict[str(upper_limit)] = dict(zip(temp_df.district, temp_df.result)) lower_limit = upper_limit # separate geojson file to match the plots above geojson_dict = {} # dict used to store each plots geo data delete_list = [] # districts to delete once all with a colour are assigned with open(json_file) as base_map: map_data = json.load(base_map) id_key = 'LAD11CD' # 'LSOA11CD', 'LAD11CD' for key, value in plot_dict.items(): geojson_list = [] for feature in map_data['features']: if str(feature['properties'][id_key]) in value: geojson_list.append(feature) # but also remove the feature from the map_data[features] file delete_list.append(str(feature['properties'][id_key])) geojson_dict[key] = geojson_list # if any features have no defined output add them but assign them a zero value. map_data['features'] = [feature for feature in map_data['features'] if feature['properties'][id_key] not in delete_list] # add a corresponding plot for the shade for those 0 values if bool(map_data['features']): plot_dict['0'] = dict((feature['properties'][id_key], 0) for feature in map_data['features']) geojson_dict['0'] = [feature for feature in map_data['features']] # create the colour palette to use colours = select_palette(shade_no, palette_colour, reverse) source_dict = {} # a dict that will store all the columndatasources for key, value in geojson_dict.items(): define_features(value, plot_dict[key], key, source_dict, min_range, max_range, step, colours, dynamic) tools = "pan,wheel_zoom,box_zoom,reset,hover,save" title = output_type + " by LA" p = figure(width=900, height=900, title=title, tools=tools) for key in sorted(source_dict.keys(), key=int, reverse=True): p.patches('x', 'y', source=source_dict[key], fill_color='color', fill_alpha=0.7, line_color="white", line_width=0.15, legend=str(key)) hover = p.select_one(HoverTool) hover.point_policy = "follow_mouse" hover.tooltips = [ ("Name", "@name"), (output_type, "@rate%"), ("Code", "@code"), ] output_dir = os.path.join(output_path, "charts") if os.path.isdir(output_dir) is False: os.mkdir(output_dir) suffix = '.html' output_filename = os.path.join(output_type + suffix) output_file_path = os.path.join(output_dir, output_filename) output_file(output_file_path, title=title, mode='inline') save(p)
def generate_interactive_bokeh_plot(self, subset, name, savelocation, annots=None, plus=None, portability="web"): """ Generates interactive bokeh plots along with (optional) annotation and enrichment reports. """ # PLOT CONFIG (NOTE: maybe expose (init) some of the configs later? ex. axis labels, sizes, etc.) # ---------------------------------------------------------------------------------------------------------- TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,resize" plot = figure(tools=TOOLS, x_axis_label="Time points (h)", y_axis_label="normalized expression counts") plot.plot_width = 800 plot.plot_height = 800 plot.title = name plot.title_text_font_size = "18pt" plot.title_text_color = "olive" plot.title_text_font = "times" plot.title_text_font_style = "italic" # AXIS (hardcoded) plot.xaxis[0].ticker=FixedTicker(ticks=self.ticks) plot.xaxis.bounds = (0, self.timepoints[-1]) plot.xaxis.major_label_orientation = pi/float(2.5) # ---------------------------------------------------------------------------------------------------------- colour_list = toolbox.get_spaced_colors(len(subset)) c = 0 for gene in subset.keys(): if annots != None: labelextra = [annots[gene][0].strip() for x_, y_ in zip(self.timepoints, subset[gene])] else: labelextra = ["" for x_, y_ in zip(self.timepoints, subset[gene])] cds = ColumnDataSource( data=dict( x=self.timepoints, y=subset[gene], label=[gene for x_, y_ in zip(self.timepoints, subset[gene])], labelextra = labelextra ) ) # PLOTARAMA plot.line("x","y", source=cds, color=colour_list[c]) circle = Circle(x='x', y='y', line_color=None, fill_color=colour_list[c]) c += 1 circle_renderer = plot.add_glyph(cds, circle) # HOVER control # ----------------------------------------------------------------------------------- if annots != None: if annots[gene][0].strip() != "": tooltips = """ <div style="width:350px"> <b> @label </b><br/> <i><u>annotations:</u></i><br/> @labelextra </div> """ else: tooltips = """ <div style="width:350px"> <b> @label </b><br/> </div> """ else: tooltips = """ <div style="width:350px"> <b> @label </b><br/> </div> """ plot.add_tools( HoverTool(tooltips=tooltips, renderers=[circle_renderer])) # ====================================================== # # TEMPLATING # # ====================================================== # script, div = components(plot) # process "name" here to get "title" title = name # If an annotation or enrichment dictionary is not supplied w/ the # methods parameters feed a blank one to the html template renderer. if annots == None: annots = {} if plus is None: plus == {} # Create the main html page scaffold scaffold = templater.Templater(script, div, title, annots) # ==================================================== if portability == "batch": path = os.path.join(savelocation, "static/") try: os.makedirs(path) except OSError: if not os.path.isdir(path): raise cssfile = urllib.URLopener() cssfile.retrieve("https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css", os.path.join(path, "bokeh-0.11.1.min.css")) jsfile = urllib.URLopener() jsfile.retrieve("https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.js", os.path.join(path, "bokeh-0.11.1.min.js")) # If a dictionary of enrichment dataframes is # available, pass it along... if plus is not None: html = scaffold.render_main_page(portability, plus) else: html = scaffold.render_main_page(portability) # WRITE IT OUT # -------------------------------------------------------------------------- filename = os.path.join(savelocation, name+".html") # better to save it with the latin-1 charset because wiggly # characters tend to sneak through annotations and they can be a pain... with codecs.open(filename, encoding='latin-1', mode="w") as f: f.write(html) reset_output(plot) # resets plot data and avoids file balloning when iterating
def main(): global arrowLeftImg; global arrowRightImg; global bucketDir; global perFuncDF; configSupplied = False; figuresForAllFunctions = []; # Set up the argument parser # parser = argparse.ArgumentParser(description= 'Visualize operation log'); parser.add_argument('files', type=str, nargs='*', help='log files to process'); parser.add_argument('-c', '--config', dest='configFile', default=''); args = parser.parse_args(); if (len(args.files) == 0): parser.print_help(); sys.exit(1); # Get names of standard CSS colors that we will use for the legend initColorList(); # Read the configuration file, if supplied. if (args.configFile != ''): configSupplied = parseConfigFile(args.configFile); if (not configSupplied): pluralSuffix = ""; if (STDEV_MULT > 1): pluralSuffix = "s"; print(color.BLUE + color.BOLD + "Will deem as outliers all function instances whose runtime " + "was " + str(STDEV_MULT) + " standard deviation" + pluralSuffix + " greater than the average runtime for that function." + color.END); # Create a directory for the files that display the data summarized # in each bucket of the outlier histogram. We call these "bucket files". # if not os.path.exists(bucketDir): os.makedirs(bucketDir); # Parallelize this later, so we are working on files in parallel. for fname in args.files: processFile(fname); # Normalize all intervals by subtracting the first timestamp. normalizeIntervalData(); # Generate plots of time series slices across all files for each bucket # in the outlier histogram. Save each cross-file slice to an HTML file. # fileNameList = generateTSSlicesForBuckets(); totalFuncs = len(perFuncDF.keys()); i = 0; # Generate a histogram of outlier durations for func in sorted(perFuncDF.keys()): funcDF = perFuncDF[func]; figure = createOutlierHistogramForFunction(func, funcDF, fileNameList); if (figure is not None): figuresForAllFunctions.append(figure); i += 1; percentComplete = float(i) / float(totalFuncs) * 100; print(color.BLUE + color.BOLD + " Generating outlier histograms... "), sys.stdout.write("%d%% complete \r" % (percentComplete) ); sys.stdout.flush(); print(color.END); reset_output(); output_file(filename = "WT-outliers.html", title="Outlier histograms"); show(column(figuresForAllFunctions));
def calculate_matrix(snplst,pop,request,r2_d="r2"): import json,math,operator,os,sqlite3,subprocess,sys # Set data directories data_dir="/local/content/ldlink/data/" gene_dir=data_dir+"refGene/sorted_refGene.txt.gz" snp_dir=data_dir+"snp142/snp142_annot_2.db" pop_dir=data_dir+"1000G/Phase3/samples/" vcf_dir=data_dir+"1000G/Phase3/genotypes/ALL.chr" tmp_dir="./tmp/" # Ensure tmp directory exists if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) # Create JSON output out_json=open(tmp_dir+"matrix"+request+".json","w") output={} # Open SNP list file snps_raw=open(snplst).readlines() if len(snps_raw)>300: output["error"]="Maximum variant list is 300 RS numbers. Your list contains "+str(len(snps_raw))+" entries." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise # Remove duplicate RS numbers snps=[] for snp_raw in snps_raw: snp=snp_raw.strip().split() if snp not in snps: snps.append(snp) # Select desired ancestral populations pops=pop.split("+") pop_dirs=[] for pop_i in pops: if pop_i in ["ALL","AFR","AMR","EAS","EUR","SAS","ACB","ASW","BEB","CDX","CEU","CHB","CHS","CLM","ESN","FIN","GBR","GIH","GWD","IBS","ITU","JPT","KHV","LWK","MSL","MXL","PEL","PJL","PUR","STU","TSI","YRI"]: pop_dirs.append(pop_dir+pop_i+".txt") else: output["error"]=pop_i+" is not an ancestral population. Choose one of the following ancestral populations: AFR, AMR, EAS, EUR, or SAS; or one of the following sub-populations: ACB, ASW, BEB, CDX, CEU, CHB, CHS, CLM, ESN, FIN, GBR, GIH, GWD, IBS, ITU, JPT, KHV, LWK, MSL, MXL, PEL, PJL, PUR, STU, TSI, or YRI." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise get_pops="cat "+ " ".join(pop_dirs) proc=subprocess.Popen(get_pops, shell=True, stdout=subprocess.PIPE) pop_list=proc.stdout.readlines() ids=[i.strip() for i in pop_list] pop_ids=list(set(ids)) # Connect to snp142 database conn=sqlite3.connect(snp_dir) conn.text_factory=str cur=conn.cursor() def get_coords(rs): id=rs.strip("rs") t=(id,) cur.execute("SELECT * FROM tbl_"+id[-1]+" WHERE id=?", t) return cur.fetchone() # Find RS numbers in snp142 database rs_nums=[] snp_pos=[] snp_coords=[] warn=[] tabix_coords="" for snp_i in snps: if len(snp_i)>0: if len(snp_i[0])>2: if snp_i[0][0:2]=="rs" and snp_i[0][-1].isdigit(): snp_coord=get_coords(snp_i[0]) if snp_coord!=None: rs_nums.append(snp_i[0]) snp_pos.append(snp_coord[2]) temp=[snp_i[0],snp_coord[1],snp_coord[2]] snp_coords.append(temp) else: warn.append(snp_i[0]) else: warn.append(snp_i[0]) else: warn.append(snp_i[0]) # Close snp142 connection cur.close() conn.close() # Check RS numbers were found if warn!=[]: output["warning"]="The following RS numbers were not found in dbSNP 142: "+",".join(warn) if len(rs_nums)==0: output["error"]="Input variant list does not contain any valid RS numbers that are in dbSNP 142." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise # Check SNPs are all on the same chromosome for i in range(len(snp_coords)): if snp_coords[0][1]!=snp_coords[i][1]: output["error"]="Not all input variants are on the same chromosome: "+snp_coords[i-1][0]+"=chr"+str(snp_coords[i-1][1])+":"+str(snp_coords[i-1][2])+", "+snp_coords[i][0]+"=chr"+str(snp_coords[i][1])+":"+str(snp_coords[i][2])+"." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise # Check max distance between SNPs distance_bp=[] for i in range(len(snp_coords)): distance_bp.append(int(snp_coords[i][2])) distance_max=max(distance_bp)-min(distance_bp) if distance_max>1000000: if "warning" in output: output["warning"]=output["warning"]+". Switch rate errors become more common as distance between query variants increases (Query range = "+str(distance_max)+" bp)" else: output["warning"]="Switch rate errors become more common as distance between query variants increases (Query range = "+str(distance_max)+" bp)" # Sort coordinates and make tabix formatted coordinates snp_pos_int=[int(i) for i in snp_pos] snp_pos_int.sort() snp_coord_str=[snp_coords[0][1]+":"+str(i)+"-"+str(i) for i in snp_pos_int] tabix_coords=" "+" ".join(snp_coord_str) # Extract 1000 Genomes phased genotypes vcf_file=vcf_dir+snp_coords[0][1]+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz" tabix_snps="tabix -h {0}{1} | grep -v -e END".format(vcf_file, tabix_coords) proc=subprocess.Popen(tabix_snps, shell=True, stdout=subprocess.PIPE) # Define function to correct indel alleles def set_alleles(a1,a2): if len(a1)==1 and len(a2)==1: a1_n=a1 a2_n=a2 elif len(a1)==1 and len(a2)>1: a1_n="-" a2_n=a2[1:] elif len(a1)>1 and len(a2)==1: a1_n=a1[1:] a2_n="-" elif len(a1)>1 and len(a2)>1: a1_n=a1[1:] a2_n=a2[1:] return(a1_n,a2_n) # Import SNP VCF files vcf=proc.stdout.readlines() # Make sure there are genotype data in VCF file if vcf[-1][0:6]=="#CHROM": output["error"]="No query variants were found in 1000G VCF file" json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise h=0 while vcf[h][0:2]=="##": h+=1 head=vcf[h].strip().split() # Extract haplotypes index=[] for i in range(9,len(head)): if head[i] in pop_ids: index.append(i) hap1=[[]] for i in range(len(index)-1): hap1.append([]) hap2=[[]] for i in range(len(index)-1): hap2.append([]) rsnum_lst=[] allele_lst=[] pos_lst=[] for g in range(h+1,len(vcf)): geno=vcf[g].strip().split() if "," not in geno[3] and "," not in geno[4]: a1,a2=set_alleles(geno[3],geno[4]) for i in range(len(index)): if geno[index[i]]=="0|0": hap1[i].append(a1) hap2[i].append(a1) elif geno[index[i]]=="0|1": hap1[i].append(a1) hap2[i].append(a2) elif geno[index[i]]=="1|0": hap1[i].append(a2) hap2[i].append(a1) elif geno[index[i]]=="1|1": hap1[i].append(a2) hap2[i].append(a2) elif geno[index[i]]=="0": hap1[i].append(a1) hap2[i].append(".") elif geno[index[i]]=="1": hap1[i].append(a2) hap2[i].append(".") else: hap1[i].append(".") hap2[i].append(".") if geno[1] in snp_pos: rs_query=rs_nums[snp_pos.index(geno[1])] rs_1000g=geno[2] if rs_query==rs_1000g: rsnum=rs_1000g else: rsnum=rs_1000g if "warning" in output: output["warning"]=output["warning"]+". Genomic position for query variant ("+rs_query+") does not match RS number at 1000G position ("+rs_1000g+")" else: output["warning"]="Genomic position for query variant ("+rs_query+") does not match RS number at 1000G position ("+rs_1000g+")" else: rsnum=geno[2] if "warning" in output: output["warning"]=output["warning"]+". Genomic position ("+geno[1]+") in VCF file does not match db142 search coordinates for query variant" else: output["warning"]="Genomic position ("+geno[1]+") in VCF file does not match db142 search coordinates for query variant" rsnum_lst.append(rsnum) position="chr"+geno[0]+":"+geno[1]+"-"+geno[1] pos_lst.append(position) alleles=a1+"/"+a2 allele_lst.append(alleles) # Calculate Pairwise LD Statistics all_haps=hap1+hap2 ld_matrix=[[[None for v in range(2)] for i in range(len(all_haps[0]))] for j in range(len(all_haps[0]))] for i in range(len(all_haps[0])): for j in range(i,len(all_haps[0])): hap={} for k in range(len(all_haps)): # Extract haplotypes hap_k=all_haps[k][i]+all_haps[k][j] if hap_k in hap: hap[hap_k]+=1 else: hap[hap_k]=1 # Remove Missing Haplotypes keys=hap.keys() for key in keys: if "." in key: hap.pop(key, None) # Check all haplotypes are present if len(hap)!=4: snp_i_a=allele_lst[i].split("/") snp_j_a=allele_lst[j].split("/") haps=[snp_i_a[0]+snp_j_a[0],snp_i_a[0]+snp_j_a[1],snp_i_a[1]+snp_j_a[0],snp_i_a[1]+snp_j_a[1]] for h in haps: if h not in hap: hap[h]=0 # Perform LD calculations A=hap[sorted(hap)[0]] B=hap[sorted(hap)[1]] C=hap[sorted(hap)[2]] D=hap[sorted(hap)[3]] delta=float(A*D-B*C) Ms=float((A+C)*(B+D)*(A+B)*(C+D)) if Ms!=0: # D prime if delta<0: D_prime=round(abs(delta/min((A+C)*(A+B),(B+D)*(C+D))),3) else: D_prime=round(abs(delta/min((A+C)*(C+D),(A+B)*(B+D))),3) # R2 r2=round((delta**2)/Ms,3) # Find Correlated Alleles if r2>0.1: N=A+B+C+D # Expected Cell Counts eA=(A+B)*(A+C)/N eB=(B+A)*(B+D)/N eC=(C+A)*(C+D)/N eD=(D+C)*(D+B)/N # Calculate Deltas dA=(A-eA)**2 dB=(B-eB)**2 dC=(C-eC)**2 dD=(D-eD)**2 dmax=max(dA,dB,dC,dD) if dmax==dA or dmax==dD: match=sorted(hap)[0][0]+"="+sorted(hap)[0][1]+","+sorted(hap)[2][0]+"="+sorted(hap)[1][1] else: match=sorted(hap)[0][0]+"="+sorted(hap)[1][1]+","+sorted(hap)[2][0]+"="+sorted(hap)[0][1] else: match=" = , = " else: D_prime="NA" r2="NA" match=" = , = " snp1=rsnum_lst[i] snp2=rsnum_lst[j] pos1=pos_lst[i].split("-")[0] pos2=pos_lst[j].split("-")[0] allele1=allele_lst[i] allele2=allele_lst[j] corr=match.split(",")[0].split("=")[1]+"="+match.split(",")[0].split("=")[0]+","+match.split(",")[1].split("=")[1]+"="+match.split(",")[1].split("=")[0] corr_f=match ld_matrix[i][j]=[snp1,snp2,allele1,allele2,corr,pos1,pos2,D_prime,r2] ld_matrix[j][i]=[snp2,snp1,allele2,allele1,corr_f,pos2,pos1,D_prime,r2] # Generate D' and R2 output matrices d_out=open(tmp_dir+"d_prime_"+request+".txt", "w") r_out=open(tmp_dir+"r2_"+request+".txt", "w") print >> d_out, "RS_number"+"\t"+"\t".join(rsnum_lst) print >> r_out, "RS_number"+"\t"+"\t".join(rsnum_lst) dim=len(ld_matrix) for i in range(dim): temp_d=[rsnum_lst[i]] temp_r=[rsnum_lst[i]] for j in range(dim): temp_d.append(str(ld_matrix[i][j][7])) temp_r.append(str(ld_matrix[i][j][8])) print >> d_out, "\t".join(temp_d) print >> r_out, "\t".join(temp_r) # Generate Plot Variables out=[j for i in ld_matrix for j in i] xnames=[] ynames=[] xA=[] yA=[] corA=[] xpos=[] ypos=[] D=[] R=[] box_color=[] box_trans=[] if r2_d not in ["r2","d"]: if "warning" in output: output["warning"]=output["warning"]+". "+r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default" else: output["warning"]=r2_d+" is not an acceptable value for r2_d (r2 or d required). r2 is used by default" r2_d="r2" for i in range(len(out)): snp1,snp2,allele1,allele2,corr,pos1,pos2,D_prime,r2=out[i] xnames.append(snp1) ynames.append(snp2) xA.append(allele1) yA.append(allele2) corA.append(corr) xpos.append(pos1) ypos.append(pos2) if r2_d=="r2" and r2!="NA": D.append(str(round(float(D_prime),4))) R.append(str(round(float(r2),4))) box_color.append("red") box_trans.append(r2) elif r2_d=="d" and D_prime!="NA": D.append(str(round(float(D_prime),4))) R.append(str(round(float(r2),4))) box_color.append("red") box_trans.append(abs(D_prime)) else: D.append("NA") R.append("NA") box_color.append("blue") box_trans.append(0.1) # Import plotting modules from collections import OrderedDict from bokeh.embed import components,file_html from bokeh.models import HoverTool,LinearAxis,Range1d from bokeh.plotting import ColumnDataSource,curdoc,figure,output_file,reset_output,save from bokeh.resources import CDN from math import pi reset_output() # Aggregate Plotting Data x=[] y=[] w=[] h=[] coord_snps_plot=[] snp_id_plot=[] alleles_snp_plot=[] for i in range(0,len(xpos),int(len(xpos)**0.5)): x.append(int(xpos[i].split(":")[1])/1000000.0) y.append(0.5) w.append(0.00003) h.append(1.06) coord_snps_plot.append(xpos[i]) snp_id_plot.append(xnames[i]) alleles_snp_plot.append(xA[i]) # Generate error if less than two SNPs if len(x)<2: output["error"]="Less than two variants to plot." json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return("","") raise source2=ColumnDataSource( data=dict( x=x, y=y, w=w, h=h, coord_snps_plot=coord_snps_plot, snp_id_plot=snp_id_plot, alleles_snp_plot=alleles_snp_plot, ) ) buffer=(x[-1]-x[0])*0.025 xr=Range1d(start=x[0]-buffer, end=x[-1]+buffer) yr=Range1d(start=-0.03, end=1.03) y2_ll=[-0.03]*len(x) y2_ul=[1.03]*len(x) yr_pos=Range1d(start=(x[-1]+buffer)*-1, end=(x[0]-buffer)*-1) yr0=Range1d(start=0, end=1) yr2=Range1d(start=0, end=3.8) yr3=Range1d(start=0, end=1) spacing=(x[-1]-x[0]+buffer+buffer)/(len(x)*1.0) x2=[] y0=[] y1=[] y2=[] y3=[] y4=[] for i in range(len(x)): x2.append(x[0]-buffer+spacing*(i+0.5)) y0.append(0) y1.append(0.20) y2.append(0.80) y3.append(1) y4.append(1.15) xname_pos=[] for i in x2: for j in range(len(x2)): xname_pos.append(i) # Matrix Plot source = ColumnDataSource( data=dict( xname=xnames, xname_pos=xname_pos, yname=ynames, xA=xA, yA=yA, xpos=xpos, ypos=ypos, R2=R, Dp=D, corA=corA, box_color=box_color, box_trans=box_trans, ) ) threshold=70 if len(snps)<threshold: matrix_plot=figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5, x_range=xr, y_range=list(reversed(rsnum_lst)), h_symmetry=False, v_symmetry=False, border_fill='white', x_axis_type=None, logo=None, tools="hover,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700) else: matrix_plot=figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5, x_range=xr, y_range=list(reversed(rsnum_lst)), h_symmetry=False, v_symmetry=False, border_fill='white', x_axis_type=None, y_axis_type=None, logo=None, tools="hover,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700) matrix_plot.rect('xname_pos', 'yname', 0.95*spacing, 0.95, source=source, color="box_color", alpha="box_trans", line_color=None) matrix_plot.grid.grid_line_color=None matrix_plot.axis.axis_line_color=None matrix_plot.axis.major_tick_line_color=None if len(snps)<threshold: matrix_plot.axis.major_label_text_font_size="8pt" matrix_plot.xaxis.major_label_orientation="vertical" matrix_plot.axis.major_label_text_font_style="normal" matrix_plot.xaxis.major_label_standoff=0 sup_2=u"\u00B2" hover=matrix_plot.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("SNP 1", " "+"@yname (@yA)"), ("SNP 2", " "+"@xname (@xA)"), ("D\'", " "+"@Dp"), ("R"+sup_2, " "+"@R2"), ("Correlated Alleles", " "+"@corA"), ]) # Connecting and Rug Plots # Connector Plot if len(snps)<threshold: connector=figure(outline_line_color="white", y_axis_type=None, x_axis_type=None, x_range=xr, y_range=yr2, border_fill='white', title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False, plot_width=800, plot_height=90, tools="xpan,tap") connector.segment(x, y0, x, y1, color="black") connector.segment(x, y1, x2, y2, color="black") connector.segment(x2, y2, x2, y3, color="black") connector.text(x2,y4,text=snp_id_plot,alpha=1, angle=pi/2, text_font_size="8pt",text_baseline="middle", text_align="left") else: connector=figure(outline_line_color="white", y_axis_type=None, x_axis_type=None, x_range=xr, y_range=yr3, border_fill='white', title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False, plot_width=800, plot_height=30, tools="xpan,tap") connector.segment(x, y0, x, y1, color="black") connector.segment(x, y1, x2, y2, color="black") connector.segment(x2, y2, x2, y3, color="black") connector.yaxis.major_label_text_color=None connector.yaxis.minor_tick_line_alpha=0 ## Option does not work connector.yaxis.axis_label=" " connector.grid.grid_line_color=None connector.axis.axis_line_color=None connector.axis.major_tick_line_color=None connector.axis.minor_tick_line_color=None connector.toolbar_location=None # Rug Plot rug=figure(x_range=xr, y_range=yr, y_axis_type=None, title="", min_border_top=1, min_border_bottom=0, min_border_left=100, min_border_right=5, h_symmetry=False, v_symmetry=False, plot_width=800, plot_height=50, tools="hover,xpan,tap") rug.rect(x, y, w, h, source=source2, fill_color="red", dilate=True, line_color=None, fill_alpha=0.6) hover=rug.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("SNP", "@snp_id_plot (@alleles_snp_plot)"), ("Coord", "@coord_snps_plot"), ]) rug.toolbar_location=None # Gene Plot tabix_gene="tabix -fh {0} {1}:{2}-{3} > {4}".format(gene_dir, snp_coords[1][1], int((x[0]-buffer)*1000000), int((x[-1]+buffer)*1000000), tmp_dir+"genes_"+request+".txt") subprocess.call(tabix_gene, shell=True) filename=tmp_dir+"genes_"+request+".txt" genes_raw=open(filename).readlines() genes_plot_start=[] genes_plot_end=[] genes_plot_y=[] genes_plot_name=[] exons_plot_x=[] exons_plot_y=[] exons_plot_w=[] exons_plot_h=[] exons_plot_name=[] exons_plot_id=[] exons_plot_exon=[] lines=[0] gap=80000 tall=0.75 if genes_raw!=None: for i in range(len(genes_raw)): bin,name_id,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,score,name2,cdsStartStat,cdsEndStat,exonFrames=genes_raw[i].strip().split() name=name2 id=name_id e_start=exonStarts.split(",") e_end=exonEnds.split(",") # Determine Y Coordinate i=0 y_coord=None while y_coord==None: if i>len(lines)-1: y_coord=i+1 lines.append(int(txEnd)) elif int(txStart)>(gap+lines[i]): y_coord=i+1 lines[i]=int(txEnd) else: i+=1 genes_plot_start.append(int(txStart)/1000000.0) genes_plot_end.append(int(txEnd)/1000000.0) genes_plot_y.append(y_coord) genes_plot_name.append(name+" ") for i in range(len(e_start)-1): if strand=="+": exon=i+1 else: exon=len(e_start)-1-i width=(int(e_end[i])-int(e_start[i]))/1000000.0 x_coord=int(e_start[i])/1000000.0+(width/2) exons_plot_x.append(x_coord) exons_plot_y.append(y_coord) exons_plot_w.append(width) exons_plot_h.append(tall) exons_plot_name.append(name) exons_plot_id.append(id) exons_plot_exon.append(exon) n_rows=len(lines) genes_plot_yn=[n_rows-w+0.5 for w in genes_plot_y] exons_plot_yn=[n_rows-w+0.5 for w in exons_plot_y] yr2=Range1d(start=0, end=n_rows) source2=ColumnDataSource( data=dict( exons_plot_name=exons_plot_name, exons_plot_id=exons_plot_id, exons_plot_exon=exons_plot_exon, ) ) max_genes=40 if len(lines)<3 or len(genes_raw)>max_genes: plot_h_pix=150 else: plot_h_pix=150+(len(lines)-2)*50 gene_plot=figure(min_border_top=2, min_border_bottom=0, min_border_left=100, min_border_right=5, x_range=xr, y_range=yr2, border_fill='white', title="", h_symmetry=False, v_symmetry=False, logo=None, plot_width=800, plot_height=plot_h_pix, tools="hover,xpan,box_zoom,wheel_zoom,tap,reset,previewsave") if len(genes_raw)<=max_genes: gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end, genes_plot_yn, color="black", alpha=1, line_width=2) gene_plot.rect(exons_plot_x, exons_plot_yn, exons_plot_w, exons_plot_h, source=source2, fill_color="grey", line_color="grey") gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt", text_font_style="bold", text_baseline="middle", text_align="right", angle=0) hover=gene_plot.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("Gene", "@exons_plot_name"), ("ID", "@exons_plot_id"), ("Exon", "@exons_plot_exon"), ]) else: x_coord_text=x[0]+(x[-1]-x[0])/2.0 gene_plot.text(x_coord_text, n_rows/2.0, text="Too many genes to plot.", alpha=1, text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0) gene_plot.xaxis.axis_label="Chromosome "+snp_coords[1][1]+" Coordinate (Mb)(GRCh37)" gene_plot.yaxis.axis_label="Genes" gene_plot.ygrid.grid_line_color=None gene_plot.yaxis.axis_line_color=None gene_plot.yaxis.minor_tick_line_color=None gene_plot.yaxis.major_tick_line_color=None gene_plot.yaxis.major_label_text_color=None hover=gene_plot.select(dict(type=HoverTool)) hover.tooltips=OrderedDict([ ("Gene", "@exons_plot_name"), ("ID", "@exons_plot_id"), ("Exon", "@exons_plot_exon"), ]) gene_plot.toolbar_location="below" #html=file_html(curdoc(), CDN, "Test Plot") #out_html=open("LDmatrix.html","w") #print >> out_html, html #out_html.close() out_script,out_div=components(curdoc(), CDN) reset_output() # Return output json_output=json.dumps(output, sort_keys=True, indent=2) print >> out_json, json_output out_json.close() return(out_script,out_div)
def plotzipcomplaints(self,mapPoints,dat): reset_output() """This method is to draw a circle for each zipcode in NYC. The size of the circle is proportional to the number of complaints in the zipcode""" numberOfComplaints = [] polygons = polygons = {'lat_list':[],'lng_list':[],'radius_list':[]} X = [] Y = [] zipCodes = [] record_index = 0 for r in dat.iterRecords(): currentZip = r[0] intzip = int(currentZip) # Keeps only zip codes in NY area. if intzip in self.zipBoroughdata: zipCodes.append(intzip) # Gets shape for this zip. shape = dat.shapeRecord(record_index).shape points = shape.points # Breaks into lists for lat/lng. lngs = [p[0] for p in points] lats = [p[1] for p in points] # Stores lat/lng for current zip shape. polygons['lng_list'].append(lngs) polygons['lat_list'].append(lats) zip_box = shape.bbox lng_avg = (zip_box[0]+zip_box[2])/2 lat_avg = (zip_box[1]+zip_box[3])/2 X.append(lng_avg) Y.append(lat_avg) # Calculate ratio of number of complaints if currentZip in mapPoints['zip_complaints']: numberOfComplaints.append(mapPoints['zip_complaints'][currentZip]) record_index += 1 maxNumComplaints = max(numberOfComplaints) minNumComplaints = min(numberOfComplaints) sortedlist=[] for i in sorted(numberOfComplaints): sortedlist.append(i) for i in numberOfComplaints: polygons['radius_list'].append(i/(maxNumComplaints*float(100))) # # Creates the Plot File3 = output_file("plotZipComplaints.html", title="ZipComplaints") TOOLS="pan,wheel_zoom,box_zoom,reset,previewsave" # Creates the polygons. patches(polygons['lng_list'], polygons['lat_list'], \ fill_color = 'white', line_color="gray", \ tools=TOOLS, plot_width=1100, plot_height=700, \ title="Radius of circle according to the Number of Complaints in the Zip Code") hold() scatter(X,Y, fill_color='red',color='red', radius = polygons['radius_list'], alpha = 0.6, tools=TOOLS) show()
if len(catalog[entry]['sources']): html = re.sub(r'(\<\/body\>)', r'<em>Sources of data:</em><br><table><tr><th width=30px>ID</th><th>Source</th></tr>\n\1', html) for source in catalog[entry]['sources']: html = re.sub(r'(\<\/body\>)', r'<tr><td>' + source['alias'] + r'</td><td>' + (('<a href="' + source['url'] + '">') if 'url' in source else '') + source['name'].encode('ascii', 'xmlcharrefreplace').decode("utf-8") + (r'</a>' if 'url' in source else '') + r'</td></tr>\n\1', html) html = re.sub(r'(\<\/body\>)', r'</table>\n\1', html) html = re.sub(r'(\<\/body\>)', returnlink+r'\n\1', html) print(outdir + eventname + ".html") with open(outdir + eventname + ".html", "w") as fff: fff.write(html) # Necessary to clear Bokeh state reset_output() #if spectraavail and dohtml: # sys.exit() #if fcnt > 100: # sys.exit() # Save this stuff because next line will delete it. if args.writecatalog: if 'photoplot' in catalog[entry]: tdepages.append(catalog[entry]['aliases'] + ['https://tde.space/' + catalog[entry]['photoplot']]) if 'sources' in catalog[entry]: for sourcerow in catalog[entry]['sources']: strippedname = re.sub('<[^<]+?>', '', sourcerow['name'].encode('ascii','xmlcharrefreplace').decode("utf-8"))
def comparetowagencies(self,mapPoints,dat): reset_output() """This method is used to create an analogous map for NYC to compare two agencies in terms of number of complaints for each zip code""" polygons = {'lat_list':[],'lng_list':[],'color_list':[]}# creates a dict for zip # color = ['#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#08519c', '#08306b'] color = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"] ratios = [] trueratios = [] ratio_colors = [] agency_names = [] zipCodes = [] record_index = 0 for r in dat.iterRecords(): currentZip = r[0] intzip = int(currentZip) if intzip in self.zipBoroughdata: zipCodes.append(intzip) shape = dat.shapeRecord(record_index).shape points = shape.points lngs = [p[0] for p in points ] lats = [p[1] for p in points ] polygons['lng_list'].append(lngs) polygons['lat_list'].append(lats) ####calculate ratio of number of complaints if currentZip in mapPoints['zip_complaints']: sortedlist = sorted(mapPoints['zip_complaints'][currentZip].items(),key = operator.itemgetter(0)) if (sortedlist[0][1]+sortedlist[1][1]) == 0: ratios.append('NA') else: # trueratio = (float(sortedlist[0][1])/(sortedlist[0][1]+sortedlist[1][1]))*len(color)-1 calculate_each_ratio = int(floor((float(sortedlist[0][1])/(sortedlist[0][1]+sortedlist[1][1]))*(len(color)-1))) ratios.append(calculate_each_ratio) # trueratios.append(trueratio) for i in ratios: if i=='NA': polygons['color_list'].append('white') else: ii = int(i) polygons['color_list'].append(color[ii]) record_index += 1 agency_names.append(sortedlist[1][0]) agency_names.append(sortedlist[0][0]) file2 = output_file("CompareTwoAgencies.html", title="ComapareTwoAgencies") TOOLS="pan,wheel_zoom,box_zoom,reset,previewsave,hover" source = ColumnDataSource( data=dict( ratios = trueratios, zipCodes = zipCodes ) ) # Creates the polygons. patches(polygons['lng_list'], polygons['lat_list'], \ fill_color=polygons['color_list'], line_color="gray", \ tools=TOOLS, plot_width=1100, plot_height=700, \ title="Ratio of Number of Complaints of selected agencies according to Zip Code", source = source) hover = curplot().select(dict(type=HoverTool)) hover.tooltips = OrderedDict([("Zip Code", "@zipCodes")]) hold() x, y1 = -74.2, 40.77 y2 = 40.765 for i, agency in enumerate(color): rect([x+0.01], [y1], color=color[i], width=0.01, height=.02) y1 = y1 + .01 ratio_values = ['100% ', '100% '] for i, agency in enumerate(agency_names): text([x], [y2], text=ratio_values[i] + agency, angle=0, text_font_size="8pt", font_weight = 'bold', text_align="right", text_baseline="middle") y2 = y2 + 0.08 show()