def test_bbox_inches_tight(): #: Test that a figure saved using bbox_inches='tight' is clipped correctly data = [[66386, 174296, 75131, 577908, 32015], [58230, 381139, 78045, 99308, 160454], [89135, 80552, 152558, 497981, 603535], [78415, 81858, 150656, 193263, 69638], [139361, 331509, 343164, 781380, 52269]] colLabels = rowLabels = [''] * 5 rows = len(data) ind = np.arange(len(colLabels)) + 0.3 # the x locations for the groups cellText = [] width = 0.4 # the width of the bars yoff = np.zeros(len(colLabels)) # the bottom values for stacked bar chart fig, ax = plt.subplots(1, 1) for row in range(rows): ax.bar(ind, data[row], width, bottom=yoff, align='edge', color='b') yoff = yoff + data[row] cellText.append(['']) plt.xticks([]) plt.xlim(0, 5) plt.legend([''] * 5, loc=(1.2, 0.2)) # Add a table at the bottom of the axes cellText.reverse() plt.table(cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, loc='bottom')
def test_zorder(): data = [[66386, 174296], [58230, 381139]] colLabels = ('Freeze', 'Wind') rowLabels = ['%d year' % x for x in (100, 50)] cellText = [] yoff = np.zeros(len(colLabels)) for row in reversed(data): yoff += row cellText.append(['%1.1f' % (x/1000.0) for x in yoff]) t = np.linspace(0, 2*np.pi, 100) plt.plot(t, np.cos(t), lw=4, zorder=2) plt.table(cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, loc='center', zorder=-2, ) plt.table(cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, loc='upper center', zorder=4, ) plt.yticks([])
def plot(self): # Read events. #self.read_simple_events() #self.read_external_events() self.read_events() self.scale(self.scale_factor) # Set the plot size. grid_row = 2 grid_fig_col = self.num_simple_events / 2 grid_legend_col = 8 grid_col = grid_fig_col + grid_legend_col fig = plt.figure(figsize = (grid_col, grid_row * 6)) # Plot simple events. plt.subplot2grid((grid_row, grid_col), (0, 0), colspan = grid_fig_col) x = np.arange(self.num_simple_events) # Prepare colors. colors = self.get_colors(len(V8_STATES_PLOT)) plt.stackplot(x, [self.data[key] for key in V8_STATES_PLOT], colors = colors) # Set the axis limits. plt.xlim(xmin = 0, xmax = self.num_simple_events - 1) plt.ylim(ymin = 0, ymax = self.sampling_period) # Draw legend. plt.subplot2grid((grid_row, grid_col), (0, grid_col - 1)) total_ticks = self.num_simple_events * self.sampling_period plt.table(cellText = [[str(100 * sum(self.data[key]) / total_ticks) + ' %'] for key in reversed(V8_STATES_PLOT)], rowLabels = V8_STATES_PLOT[::-1], rowColours = colors[::-1], colLabels = ['Ticks'], loc = 'center') plt.xticks([]) plt.yticks([]) # Plot external events. plt.subplot2grid((grid_row, grid_col), (1, 0), colspan = grid_fig_col) x = np.arange(self.num_external_events) # Prepare colors. colors = self.get_colors(len(EXTERNAL_DETAILS)) plt.stackplot(x, [self.data_external[key] for key in EXTERNAL_DETAILS], colors = colors) # Set the axis limits. plt.xlim(xmin = 0, xmax = self.num_external_events - 1) plt.ylim(ymin = 0, ymax = self.sampling_period) # Draw legend. plt.subplot2grid((grid_row, grid_col), (1, grid_col - 3), colspan = 3) total_ticks = 0 for key in EXTERNAL_DETAILS: total_ticks += sum(self.data_external[key]) + 1 plt.table(cellText = [[str(100 * sum(self.data_external[key]) / total_ticks) + ' %', str(sum(self.num_external[key]))] for key in reversed(EXTERNAL_DETAILS)], rowLabels = EXTERNAL_DETAILS[::-1], rowColours = colors[::-1], colLabels = ['Ticks', '# of Times'], loc = 'center') plt.xticks([]) plt.yticks([]) # Finally draw the plot. plt.tight_layout() plt.show()
def plotTable(inData): fig = plt.figure(figsize=(10,5)) plt.axis('off') plt.tight_layout() plt.table(cellText=[row for row in inData[1:]], loc = 'center', rowLabels = range(len(inData)-1), colLabels = inData[0])
def plotTable(inData): fig = plt.figure(figsize=(20,10)) plt.axis('off') plt.tight_layout() plt.table(cellText=[row[1:] for row in inData[1:]], loc = 'center', rowLabels = [row[0] for row in inData[1:]], colLabels = inData[0])
def reportwin(namel,report,reportl): save_report=open(str(direktorij+'/report.tex'),'w') save_report.write(report) save_report.close() plt.figure(figsize=(4,3)) ax=plt.gca() plt.axis('off') plt.table(cellText=reportl, colLabels=namel,loc='center') plt.savefig(str(direktorij+'/report.png')) plt.close()
def visualize_clf(file_path): ext_pattern = "14" int_pattern = "23" path = "{}/**/*{}*.p".format(file_path,ext_pattern) files = glob(path) print files thresholds = np.arange(0.65,1,0.05) file_dict = dict() for f in files: filename = f[f.rfind('/')+1:] sub = filename[:filename.find('_')] pair = (f,f.replace(ext_pattern,int_pattern)) print pair if sub in file_dict: file_dict[sub].append(pair) else: file_dict[sub]=[pair] print file_dict for sub,file_list in file_dict.iteritems(): fig = plt.figure() cell_text = [] col_labels= [] file_list = sorted(file_list) for i,pair in enumerate(file_list): print pair f = pair[0] sl = pickle.load(open(f,'rb')) data = sl.samples[0] fig.add_subplot(4,4,i+1) title = f[f.find('-')+1:] plt.title(title) col_labels.append(title) plt.hist(data) coltext = [] print title for thr in thresholds: data_3d = sl.a.mapper.reverse1(sl.samples)[0] cluster_map, n_clusters = ndimage.label(data_3d > thr) cluster_sizes = np.bincount(cluster_map.ravel())[1:] if len(cluster_sizes) != 0: coltext.append("{}".format(np.max(cluster_sizes))) else: coltext.append(0) cell_text.append(coltext) ax = fig.add_subplot(4,4,len(files)+2) ax.axis('off') print len(cell_text) plt.table(cellText= cell_text,rowLabels=col_labels, colLabels=thresholds,loc='center right') plt.savefig('{}.png'.format(sub))
def output_table(celltext,title,col_labels,filename,fig_size,pos_y,col_width): prop = matplotlib.font_manager.FontProperties(fname=r'MTLmr3m.ttf', size=14.5) fig=plt.figure(figsize=fig_size) ax = fig.add_subplot(111) ax.set_title(title,y=pos_y,fontproperties=prop) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) #col_labels = ['Rank','Name', 'Yell','Lv.'] the_table = plt.table(cellText=celltext, colLabels=col_labels, loc='center' ) cells = the_table.get_celld() for i in range(len(celltext)+1): #0.09,0.55,0.1,0.05,0.13 for k in range(len(col_width)): cells[(i,k)].set_width(col_width[k]) for pos, cell in cells.iteritems(): cell.set_text_props( fontproperties=prop ) the_table.auto_set_font_size(False) the_table.set_fontsize(11.5) plt.savefig(filename)
def test_bbox_inches_tight(): "Test that a figure saved using bbox_inches'tight' is clipped right" rcParams.update(rcParamsDefault) data = [[ 66386, 174296, 75131, 577908, 32015], [ 58230, 381139, 78045, 99308, 160454], [ 89135, 80552, 152558, 497981, 603535], [ 78415, 81858, 150656, 193263, 69638], [ 139361, 331509, 343164, 781380, 52269]] colLabels = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail') rowLabels = ['%d year' % x for x in (100, 50, 20, 10, 5)] rows = len(data) ind = np.arange(len(colLabels)) + 0.3 # the x locations for the groups cellText = [] width = 0.4 # the width of the bars yoff = np.array([0.0] * len(colLabels)) # the bottom values for stacked bar chart fig, ax = plt.subplots(1,1) for row in xrange(rows): plt.bar(ind, data[row], width, bottom=yoff) yoff = yoff + data[row] cellText.append(['%1.1f' % (x/1000.0) for x in yoff]) plt.xticks([]) plt.legend(['1', '2', '3', '4', '5'], loc = (1.2, 0.2)) # Add a table at the bottom of the axes cellText.reverse() the_table = plt.table(cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, loc='bottom')
def plot_aggregate_results(wf_name, data): aggr = lambda results: int(interval_statistics(results if len(results) > 0 else [0.0])[0]) # aggr = lambda results: len(results) data = data[wf_name] bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] value_map = {b: [] for b in bins} for d in data: fcount = d["result"]["overall_failed_tasks_count"] makespan = d["result"]["makespan"] value_map[fcount].append(makespan) values = [bin for bin, values in sorted(value_map.items(), key=lambda x: x[0]) for _ in values] plt.grid(True) n, bins, patches = pylab.hist(values, bins, histtype='stepfilled') pylab.setp(patches, 'facecolor', 'g', 'alpha', 0.75) values = [aggr(values) for bin, values in sorted(value_map.items(), key=lambda x: x[0])] rows = [[str(v) for v in values]] the_table = plt.table(cellText=rows, rowLabels=None, colLabels=bins, loc='bottom') pass
def _create_summary_table(series_map, bins=None, **kwargs): rows = [] row_labels = [] column_labels = ['Total', 'Not Null', '% Shown'] for group, srs in series_map.iteritems(): total_num = len(srs) not_null = len(srs[pd.notnull(srs)]) if bins is not None: not_shown = len(srs[(pd.isnull(srs)) | (srs > max(bins)) | (srs < min(bins))]) else: not_shown = len(srs[(pd.isnull(srs))]) percent_shown = (total_num - not_shown) / total_num * 100.0 if total_num > 0 else 0 pct_string = "{number:.{digits}f}%".format(number=percent_shown, digits=1) row_labels.append(group) rows.append([total_num, not_null, pct_string]) table = plt.table(cellText=rows, rowLabels=row_labels, colLabels=column_labels, colWidths=[0.08] * 3, loc='upper center') _make_table_pretty(table, **kwargs) return table
def __init__(self): self.fig = plt.figure(figsize=(5, 5)) ax = self.fig.add_subplot(1, 1, 1) ax.set_aspect("equal") ax.set_axis_off() self.fig.subplots_adjust(0.0, 0.0, 1, 1) data = np.repeat(np.arange(1, 10)[:, None], 9, axis=1) table = plt.table(cellText=data, loc="center", cellLoc="center") table.auto_set_font_size(False) table.set_fontsize(20) for v in np.arange(0.05, 1, 0.3): line1 = plt.Line2D([v, v], [0.05, 0.95], lw=2, color="k") line2 = plt.Line2D([0.05, 0.95], [v, v], lw=2, color="k") for line in (line1, line2): line.set_transform(ax.transAxes) ax.add_artist(line) self.cells = table._cells for loc, cell in self.cells.iteritems(): cell.set_width(0.1) cell.set_height(0.1) cell.set_edgecolor("#AAAAAA") self.current_pos = (0, 0) self.set_current_cell((0, 0)) self.setted_cells = {} self.solver = SudokuSolver() self.calc_solution() self.fig.canvas.mpl_connect("key_press_event", self.on_key)
def main(): tables = [] fh = open(args.input_file, "r") for row in csv.reader(fh, delimiter='\t'): if (row[2] != "sRNA") and (row[0] == "group_28"): datas = row[3].split(";") gos = [] for data in datas: gos.append(data.split("(")[0]) tables.append([row[1], row[2]]) plt.figure(figsize=(25, 10)) columns = ["name", "number"] plt.table(cellText=tables, colLabels=columns, loc='bottom') plt.savefig("test.png")
def __init__(self, _title, _ylabel, row_labels, col_labels, table_data, save_fn=None): assert len(table_data) == len(row_labels) assert len(table_data[0]) == len(col_labels) fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(111) # bar_width = 0.5 ind = [bar_width / 2 + i for i in xrange(len(col_labels))] # bar_data = table_data[:] bar_data.reverse() y_offset = np.array([0.0] * len(col_labels)) for i, row_data in enumerate(bar_data): plt.bar(ind, row_data, bar_width, bottom=y_offset, color=clists[i]) y_offset = y_offset + row_data ax.set_xlim(0, len(ind)) # formated_table_data = [] for r in table_data: formated_table_data.append(['{:,}'.format(x) for x in r]) table = plt.table(cellText=formated_table_data, colLabels=col_labels, rowLabels=row_labels, loc='bottom') table.scale(1, 2) # plt.subplots_adjust(left=0.2, bottom=0.2) plt.ylabel(_ylabel) ax.yaxis.set_major_formatter(tkr.FuncFormatter(comma_formating)) # set formatter to needed axis plt.xticks([]) plt.title(_title) if save_fn: plt.savefig('%s/%s.pdf' % (save_dir, save_fn)) plt.show()
def to_PNG(self, OutputName='TLD.png', title='Trip-Length Distribution', ylabel='Trips', units='', legend=False, table=False, table_font_colors=True, prefixes='', suffixes='', *args, **kwargs): '''Produces a graph from TLD, all columns together. Includes average distance. prefixes - to prepend to each column. Use as a marker. suffixes - to append to each column. Use as a marker. ''' if prefixes: try: self.columns = [prefix+col for col,prefix in zip(self.columns,prefixes)] except: raise ValueError("prefixes must have the same length as df.columns.") if suffixes: try: self.columns = [col+sufix for col,sufix in zip(self.columns,suffixes)] except: raise ValueError("suffixes must have the same length as df.columns.") if duplicates_in_list(self.columns): raise ValueError("Duplicate names in DataFrame's columns.") plt.clf() axs_subplot = self.plot(title=title, legend=legend) line_colors = [line.get_color() for line in axs_subplot.lines] if legend: lgd = plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, ncol=len(TLD.columns)) plt.xlabel('Dist') plt.ylabel(ylabel) if units: col_label = 'Avg Dist ({})'.format(units) else: col_label = 'Avg Dist' if table: table = plt.table( cellText=[['{:,.2f}'.format(dist)] for dist in list(self.avgdist)], colWidths = [0.1], rowLabels=[' {} '.format(col) for col in self], colLabels=[col_label], loc='upper right') #table.set_fontsize(16) table.scale(2, 2) if table and table_font_colors: for i in range(len(line_colors)): #table.get_celld()[(i+1, -1)].set_edgecolor(line_colors[i]) table.get_celld()[(i+1, -1)].set_text_props(color=line_colors[i]) oName = OutputName plt.savefig(oName, bbox_inches='tight') plt.close()
def plot_results(x_axis, y_axis, x_min, x_max, labels): try: y_axis[0][0] except IndexError: # Convert 1D list to 2D y_axis = [y_axis] colors = ('blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black') # Calculate means y_axis_means = [] for dataset in y_axis: dataset_mean=[] for group_no in range(x_max - x_min + 1): group = dataset[group_no::x_max - x_min + 1] mean = sum(group) / len(group) dataset_mean.append(mean) y_axis_means.append(dataset_mean) fig, ax = plt.subplots() # Plot datapoints for color, label, dataset in zip(colors, labels, y_axis): ax.plot(x_axis, dataset, color=color, marker='.', linestyle=' ', alpha=0.3, label='{} datapoints'.format(label)) # Plot mean for color, label, dataset_mean in zip(colors, labels, y_axis_means): ax.plot(x_axis[:x_max - x_min + 1], dataset_mean, color=color, linestyle='-', label='{} mean'.format(label)) plt.ylabel("Recognition rate") plt.xlabel("Number of training") ax.legend(loc='lower right') ax.axis([x_min - 1, x_max + 1, 0, 1]) plt.grid(True) # Add a table at the bottom of the axes plt.table( cellText=numpy.around(y_axis_means, decimals=2), rowLabels=labels, colLabels=range(x_min, x_max+1), loc='bottom', bbox=[0.20, -0.6, 0.75, 0.3] ) plt.subplots_adjust(bottom=0.4) plt.show()
def plot_metric_single_value(stats_desc, outdir, num_phases): """Plot chart and save it as PNG file""" matrix = None if stats_desc == "average": matrix = AVG_TABLE elif stats_desc == "90th": matrix = Nth_TABLE elif stats_desc == "absolute_time": matrix = TIME_TABLE if len(matrix) > 0: fig = figure() ax = fig.add_subplot(1, 1, 1) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.set_title('{0}_Value'.format(stats_desc)) table_vals = [] col_labels = [] row_labels = [] for k in matrix.iterkeys(): temp_list = [] for i in range(num_phases): if i in matrix[k].keys(): temp_list.append(matrix[k][i]) else: temp_list.append(None) table_vals.append(temp_list) col_labels.append(k) invert_table = [] for i in range(len(table_vals[0])): temp_list = [] for j in range(len(table_vals)): temp_list.append(table_vals[j][i]) invert_table.append(temp_list) for i in range(num_phases): row_labels.append("P %d" % (i)) table(cellText=invert_table, colWidths = [0.2]*len(col_labels), rowLabels=row_labels, colLabels=col_labels, loc='center') fig.savefig('{0}/zz-{1}_value.png'.format(outdir, stats_desc), dpi=300)
def test3(): cell_text = [] for i in range(10): cell_text.append(np.linspace(0,i,10)) the_table = plt.table(cellText=cell_text, rowLabels=["%s row" % i for i in range(10)], colLabels=["%s col" % i for i in range(10)]) plt.show()
def summary(data): data_win = data[data.exit_profit>0] data_lose = data[data.exit_profit<0] zero_df = data[data.exit_profit==0] total_num = len(data) av_period = data['period'].mean() plt.figure() rows = [ "Overall Profits: ", "Overall Loss: ", "Net Profits: ", "Number of Transaction: ", "Number of Winning Trades: ", "Number of Losing Trades: ", "Average Profit:", "AV Profits / AV Loss: ", "Winning Percentage: ", "Stock Holding Period: " ] cell_text=[ [str(data_win.exit_profit.sum() * 300)], [str(data_lose.exit_profit.sum() * 300)], [str((data.exit_profit.sum()) * 300)], [str(total_num)], [str(len(data_win))], [str(len(data_lose))], [str(data_win.exit_profit.sum()/ total_num*300)], [str(abs(data_win.exit_profit.sum()/len(data_win) / (data_lose.exit_profit.sum()/len(data_lose))))], [str(len(data_win)/float(total_num)*100) + "%" ], [str(av_period)] ] columns=(['Summary']) assert len(cell_text) == len(rows) # Add a table at the bottom of the axes the_table = plt.table(cellText=cell_text, colWidths = [0.4], rowLabels=rows, colLabels=columns, loc='center right', fontsize=14) plt.text(12,3.4,'Table Title',size=8) six.print_("******************************************") six.print_("总盈利: " + str(data_win.exit_profit.sum() * 300)) six.print_("总亏损: " + str(data_lose.exit_profit.sum() * 300)) six.print_("总利润: " + str((data.exit_profit.sum()) * 300)) six.print_("******************************************") six.print_("交易次数: " + str(total_num)) six.print_("盈利次数: " + str(len(data_win))) six.print_("亏损次数: " + str(len(data_lose))) six.print_("平均利润: " + str(data_win.exit_profit.sum()/ total_num*300)) six.print_("盈亏比: " + str(abs(data_win.exit_profit.sum()/len(data_win) / (data_lose.exit_profit.sum()/len(data_lose))))) six.print_("胜率: " + str(len(data_win)/float(total_num)*100) + "%" ) six.print_("平均持仓周期: " + str(av_period)) six.print_("******************************************")
def test2(): data = [[ 66386, 174296, 75131, 577908, 32015], [ 58230, 381139, 78045, 99308, 160454], [ 89135, 80552, 152558, 497981, 603535], [ 78415, 81858, 150656, 193263, 69638], [139361, 331509, 343164, 781380, 52269]] columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail') rows = ['%d year' % x for x in (100, 50, 20, 10, 5)] values = np.arange(0, 2500, 500) value_increment = 1000 # Get some pastel shades for the colors # 得到5行,每行是一个四维向量,是rgba吗? colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows))) n_rows = len(data) index = np.arange(len(columns)) + 0.3 bar_width = 0.4 # Initialize the vertical-offset for the stacked bar chart. y_offset = np.zeros(len(columns)) # Plot bars and create text labels for the table cell_text = [] for row in range(n_rows): plt.bar(index, data[row], bar_width, bottom=y_offset, color=colors[row]) y_offset = y_offset + data[row] cell_text.append(['%1.1f' % (x / 1000.0) for x in y_offset]) # Reverse colors and text labels to display the last value at the top. # 还有这种写法 colors = colors[::-1] cell_text.reverse() # Add a table at the bottom of the axes # cell_text是一个二维数组,rowLabels是行名,colLabels是行名,rowColours是行的颜色 the_table = plt.table(cellText=cell_text, rowLabels=rows, rowColours=colors, colLabels=columns, loc='bottom') # Adjust layout to make room for the table: plt.subplots_adjust(left=0.2, bottom=0.2) plt.ylabel("Loss in ${0}'s".format(value_increment)) #设置了y轴的tick的值和label的对应关系,就是说label显示500(由values中取的值),但实际对应的是500000 plt.yticks(values * value_increment, ['%d' % val for val in values]) #设置x轴的tick是空,效果上是使用了table的rowLabels plt.xticks([]) plt.title('Loss by Disaster') plt.show()
def plot_matrix(matrix, columns = None, rows = None, title = None): # Add a table at the bottom of the axes print(title) fig = plt.figure() ax = fig.add_subplot(111) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) plt.axis('off') the_table = plt.table(cellText = matrix, colLabels = columns, rowLabels = rows, loc = "center") plt.show()
def plot_mc(self): ''''ploteo de la matriz de confusion y de las diversas medidas estadisticas.''' conf_arr = self.confusion_matrix fig = plt.figure() plt.clf() ax = fig.add_subplot(221) ax.set_aspect(1) res = ax.imshow(array(conf_arr), cmap=plt.cm.jet, interpolation='nearest') nc = self.ncat width = len(conf_arr) height = len(conf_arr[0]) for x in xrange(width): for y in xrange(height): ax.annotate(str(conf_arr[x][y]), xy=(y, x),horizontalalignment='center', verticalalignment='center') cb = fig.colorbar(res) plt.title('Matriz de Confusion') plt.xlabel('Referencia') plt.ylabel('Clasificacion') alphabet = '0123-456789' alphabeto = '0123N456789' plt.xticks(range(width), alphabet[:width]) plt.yticks(range(height), alphabeto[:height]) cat = self.ncat filas =cat*2 + 2 colors = [[(0.5, 1.0, 1.0) for c in range(1)] for r in range(2)] colors[0]= [(1., 0., 0.)] colors[1]= [(1., 0., 0.)] lightgrn = (0.5, 0.8, 0.5) etiquetas_fil1 = (u'Coeficiente kappa', u'Fiabilidad global') etiquetas_fil = etiquetas_fil1[:filas] ax = fig.add_subplot(155 ,frameon=False, xticks=[], yticks=[]) valores=[['%.4f' %(float(self.kappa))],['%.4f' %(self.reliability)]] plt.table(cellText=valores, rowLabels = etiquetas_fil,loc='upper center',cellColours=colors,rowColours=[lightgrn]*16) plt.savefig('confusion_matrix.png', format='png') return conf_arr
def plottalo_bello(self): columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] rows = ['%d RP' % x for x in (1000, 500, 200, 100, 50, 25)] people_affected_rp = [] for cada in persone_pesi.itervalues(): myRoundedList = [round(elem, 2) for elem in cada.values()] people_affected_rp.append(myRoundedList) print people_affected_rp matrice = np.asarray(people_affected_rp) maximo_y = math.ceil(max(matrice.sum(0))/500)*500 values = np.arange(0, maximo_y, 100000) value_increment = 1 # Get some pastel shades for the colors colors = plt.cm.OrRd(np.linspace(0, 0.5, len(rows))) n_rows = len(persone_pesi) #index = np.arange(len(columns)) + 0.3 index = np.arange(len(columns)) bar_width = 1 # Initialize the vertical-offset for the stacked bar chart. y_offset = np.array([0.0] * len(columns)) # Plot bars and create text labels for the table cell_text = [] for row in range(n_rows): plt.bar(index, people_affected_rp[row], bar_width, bottom=y_offset, color=colors[row]) y_offset = y_offset + people_affected_rp[row] cell_text.append(['%d' % (x) for x in y_offset]) # Reverse colors and text labels to display the last value at the top. colors = colors[::-1] cell_text.reverse() # Add a table at the bottom of the axes the_table = plt.table(cellText=cell_text, rowLabels=rows, rowColours=colors, colLabels=columns, loc ='bottom') # Adjust layout to make room for the table: plt.subplots_adjust(left=0.2, bottom=0.2) plt.ylabel("People at risk per Return Period") plt.yticks(values * value_increment), ['%d' % val for val in values] plt.xticks([]) plt.title('People at risk by Return Period in ' + self.admin) plt.show()
def create_table(model, model_name, train_num, X, y): all_data = [] #right location columns = ['Training set size: %d' % x for x in train_num] rows = [ "Training time of classifier ", \ "Prediction time for training set", \ "F1 score for training set ", \ "Prediction time for testing set ", \ "F1 score for testing set "] for num in train_num: data = [] # Split data X_train, y_train, X_test, y_test = split_data(X, y, num) #"{0:.2f}".format(round(a,2)) data = [ \ "{0:.7f}".format(round(train_classifier(model, X_train, y_train),7)), \ "{0:.7f}".format(round(predict_labels(model, X_train, y_train)[0],7)), \ "{0:.7f}".format(round(predict_labels(model, X_train, y_train)[1],7)), \ "{0:.7f}".format(round(predict_labels(model, X_test, y_test)[0],7)), \ "{0:.7f}".format(round(predict_labels(model, X_test, y_test)[1],7)) \ ] all_data.append(data) #accomodating data all_ordered_data = [] num_cols = len(all_data) num_rows = len(all_data[0]) #loop r_count = 0 while r_count < num_rows: #loops from 0 up to 4 ordered_data = [] c_count = 0 while c_count < num_cols: #visits all_data[0], all_data[1], all_data[2] ordered_data.append(all_data[c_count][r_count]) c_count += 1 all_ordered_data.append(ordered_data) r_count += 1 #Get some pastel shades for the colors colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows))) # Reverse colors and text labels to display the last value at the top. colors = colors[::-1] #Add a table at the bottom of the axes the_table = plt.table(cellText=all_ordered_data, rowLabels=rows, ##row labels must be length 3 rowColours=colors, colLabels=columns, loc='center') #show table plt.title('{}'.format(model_name)) plt.axis('off') plt.savefig("table_{}.png".format(model_name)) #k components, where k is clusters plt.show()
def plotTable(df, title, header=None): fig = plt.figure() fig.suptitle(title, fontsize="x-large") figs.append(fig) fignames.append(title) if not header: tab = plt.table(cellText=df.values, # colWidths=[0.08] * len(df.columns), loc="center", cellLoc='center') else: tab = plt.table(cellText=df.values, # colWidths=[0.08] * len(df.columns), loc="center", cellLoc='center', colLabels=header) plt.axis("off")
def visualize(avg): columns = ['Bayes scores', 'Tree scores'] rows = [1,2,3,4,5] ax = mplot.subplot(111,frame_on=False) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) table = mplot.table(cellText=avg, colLabels=columns, rowLabels=rows, loc='center') mplot.subplots_adjust(left=0.2) mplot.show()
def bar_sd2(gov_percentage_list, std_list): arr_percentages = np.array(gov_percentage_list) arr_sds = np.array(std_list) arr_100s = np.array([100] * len(std_list)) # percentages = [43, 23, 55, 32, 31] # sds = [12, 15, 13, 9, 10] data = [ (arr_percentages - arr_sds).tolist(), (arr_sds).tolist(), (arr_sds).tolist(), (arr_100s - arr_sds - arr_percentages).tolist(), ] # data.append([100-num for num in percentages]) columns = ["R" + str(i + 1) for i in range(len(gov_percentage_list))] rows = ["%s" % x for x in ("opp", "opp-sd", "gov-sd", "gov")] values = np.arange(0, 110, 10) value_increment = 1 # Get some pastel shades for the colors colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows))) n_rows = len(data) index = np.arange(len(columns)) + 0.3 bar_width = 0.4 # Initialize the vertical-offset for the stacked bar chart. y_offset = np.array([0.0] * len(columns)) # Plot bars and create text labels for the table cell_text = [] for row in range(n_rows): plt.bar(index, data[row], bar_width, bottom=y_offset, color=colors[row]) y_offset = y_offset + data[row] cell_text.append(["%1.1f" % (x) for x in y_offset]) # Reverse colors and text labels to display the last value at the top. colors = colors[::-1] cell_text.reverse() # Add a table at the bottom of the axes the_table = plt.table(cellText=cell_text, rowLabels=rows, rowColours=colors, colLabels=columns, loc="bottom") # Adjust layout to make room for the table: plt.subplots_adjust(left=0.2, bottom=0.2) plt.ylabel("percentage") plt.yticks(values * value_increment, ["%d" % val for val in values]) plt.xticks([]) plt.title("Percentage for gov-win") plt.show()
def plot(x, y, o): plt.rcdefaults() fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, y, '.') ax.set_yscale('log') ax.set_xlabel(r'$\overline{m}$ (mag)') ax.set_ylabel(r'$\sigma_{m}$ (mag)') ax.set_xlim((min(x)*(1-0.05), max(x)*(1+0.05))) ax.set_ylim((min(y)*(1-0.05), max(y)*(1+0.05))) ax.xaxis.set_minor_locator(MultipleLocator(0.5)) plt.table(cellText=[['N', r'$\overline{{\sigma}}$'], [1, '{:.3f}'.format(y[0])], [5, '{:.3f}'.format(np.average(y[0:5]))], [10, '{:.3f}'.format(np.average(y[0:10]))], [25, '{:.3f}'.format(np.average(y[0:25]))], [50, '{:.3f}'.format(np.average(y[0:50]))], [100, '{:.3f}'.format(np.average(y[0:100]))]], colWidths=[0.1, 0.1], loc='center left') fig.savefig(o, bbox_inches='tight', pad_inches=0.05) plt.close(fig)
def create_table(self, data=None): cell_text = self.create_cell_text(self) row_labels = self.create_row_labels() column_labels = ['Starting Capital', 'Number of Trades', 'Ending Capital', 'Annualized Return'] colors = self.create_table_colors(row_labels, column_labels, cell_text) table = plt.table(cellText=cell_text, cellColours=colors[0], rowColours=colors[1], rowLabels=row_labels, colColours=colors[2], colLabels=column_labels, bbox=[0.0, -1.35, 1.0, 1.0], cellLoc='center') table.set_fontsize(60) return table
def printLegend(rowLabels,colLabels,params): fig = plt.figure() col_labels=colLabels row_labels=rowLabels table_vals=params the_table = plt.table(cellText=table_vals, colWidths = [0.2]*4, rowLabels=row_labels, colLabels=col_labels, loc='center') plt.text(12,3.4,'Table Title',size=8) plt.title('Legend for expiriments') plt.show()
param_rows.append("SiO$_2$/Gr: SiO$_2$ Fit") # param_rows.append("SiO$_2$/Gr/Ga$_2$O$_3$: SiO$_2$ Fit") param_rows.append("SiO$_2$/Gr/Ga$_2$O$_3$: Ga$_2$O$_3$ Fit") param_rows.append("SiO$_2$/Gr/Ga$_2$O$_3$: Generic single mode fit") param_list = [] # param_list.append(tuple(["%0.03f $\pm$ %0.03f" % (params_exp[i], math.sqrt(covar_exp[i,i])) for i in range(1)])) param_list.append(tuple(["%0.03f $\pm$ %0.03f" % (obj1_params_exp_sio2[i], math.sqrt(obj1_covar_exp_sio2[i,i])) for i in range(1)] + ["-"])) # param_list.append(tuple(["%0.03f $\pm$ %0.03f" % (obj2_params_exp_sio2[i], math.sqrt(obj2_covar_exp_sio2[i,i])) for i in range(1)] + ["-"])) param_list.append(tuple(["%0.03f $\pm$ %0.03f" % (obj2_params_exp_ga2o3[i], math.sqrt(obj2_covar_exp_ga2o3[i,i])) for i in range(1)] + ["-"])) param_list.append(tuple(["%0.03f $\pm$ %0.03f" % (obj2_params_exp_generic[i], math.sqrt(obj2_covar_exp_generic[i,i])) for i in range(2)])) exp_ax.set_title(r"Devs4_03 Run04; Bare graphene & Ga$_2$O$_3$ covered graphene"+"\n"+r"After Ga$_2$O$_3$ deposition") plt.table(cellText=param_list, rowLabels=param_rows, colLabels=param_headers, # bbox=(1.175,0.6,1.0,0.4)) bbox=(1.525,0.6,0.6,0.4)) # bbox=(1.175,0.6,0.7,0.4)) saveLoc = saveTarget + "Phonons %0.02f" % temps1[0] + "K-%0.02f" % temps1[-1] + "K %0.01fV" % vgs1[0] + "-%0.01fV" % vgs1[-1] integer = 0 if os.path.exists(saveLoc + ".png"): saveTarget = saveLoc + str(integer) + ".png" while os.path.exists(saveTarget): integer += 1 saveTarget = saveLoc + str(integer) + ".png" else: saveTarget = saveLoc + str(integer) + ".png" plt.savefig(saveTarget, bbox_inches="tight")
def _get_fit(self, per_loc, per_admit, per_cc, LOS_cc, LOS_nc, per_vent, ppe_GLOVE_SURGICAL, ppe_GLOVE_EXAM_NITRILE, ppe_GLOVE_GLOVE_EXAM_VINYL, ppe_MASK_FACE_PROCEDURE_ANTI_FOG, ppe_MASK_PROCEDURE_FLUID_RESISTANT, ppe_GOWN_ISOLATION_XLARGE_YELLOW, ppe_MASK_SURGICAL_ANTI_FOG_W_FILM, ppe_SHIELD_FACE_FULL_ANTI_FOG, ppe_RESPIRATOR_PARTICULATE_FILTER_REG, TimeLag, PopSize, ForecastDays, forecasted_y, focal_loc, fdates, new_cases, model, Forecasted_cases_df_for_download, Forecasted_patient_census_df_for_download, Forecasted_ppe_needs_df_for_download): # declare figure object fig = plt.figure(figsize=(15, 17)) # shorten location name if longer than 12 characters loc = str(focal_loc) if len(loc) > 12: loc = loc[:12] loc = loc + '...' #### Inclusion of time lag # time lag is modeled as a Poisson distributed # random variable with a mean chosen by the user (TimeLag) new_cases_lag = [] x = list(range(len(forecasted_y))) for i in new_cases: lag_pop = i*poisson.pmf(x, TimeLag) new_cases_lag.append(lag_pop) # Declare a list to hold time-staggered lists # This will allow the time-lag effects to # be summed across rows (days) lol = [] for i, daily_vals in enumerate(new_cases_lag): # number of indices to pad in front fi = [0]*i diff = len(new_cases) - len(fi) # number of indices to pad in back bi = [0]*diff ls = list(fi) + list(daily_vals) + list(bi) lol.append(np.array(ls)) # convert the list of time-staggered lists to an array ar = np.array(lol) # get the time-lagged sum of visits across days ts_lag = np.sum(ar, axis=0) # upper truncate for the number of days in observed y values ts_lag = ts_lag[:len(new_cases)] ts_lag = ts_lag[:len(new_cases)] p = 0.1 n_cc = LOS_cc*10 n_nc = LOS_nc*10 # get the binomial random variable properties rv_nc = binom(n_nc, p) # Use the binomial cumulative distribution function p_nc = rv_nc.cdf(np.array(range(1, len(fdates)+1))) # get the binomial random variable properties rv_cc = binom(n_cc, p) # Use the binomial cumulative distribution function p_cc = rv_cc.cdf(np.array(range(1, len(fdates)+1))) # Initiate lists to hold numbers of critical care and non-critical care patients # who are expected as new admits (index 0), as 1 day patients, 2 day patients, etc. LOScc = np.zeros(len(fdates)) LOScc[0] = ts_lag[0] * (0.01 * per_cc) * (0.01 * per_admit) * (0.01 * per_loc) LOSnc = np.zeros(len(fdates)) LOSnc[0] = ts_lag[0] * (1-(0.01 * per_cc)) * (0.01 * per_admit) * (0.01 * per_loc) total_nc = [] total_cc = [] # Roll up patient carry-over into lists of total critical care and total # non-critical patients expected for i, day in enumerate(fdates): LOScc = LOScc * (1 - p_cc) LOSnc = LOSnc * (1 - p_nc) LOScc = np.roll(LOScc, shift=1) LOSnc = np.roll(LOSnc, shift=1) LOScc[0] = ts_lag[i] * (0.01 * per_cc) * (0.01 * per_admit) * (0.01 * per_loc) LOSnc[0] = ts_lag[i] * (1 - (0.01 * per_cc)) * (0.01 * per_admit) * (0.01 * per_loc) total_nc.append(np.sum(LOSnc)) total_cc.append(np.sum(LOScc)) # # Plot the critical care and non-critical care patient census over the # # forecasted time frame plt.plot(fdates[-(ForecastDays+1):], total_cc[-(ForecastDays+1):], c='m', label='Critical care', linewidth=3) plt.plot(fdates[-(ForecastDays+1):], total_nc[-(ForecastDays+1):], c='0.4', label='Non-critical care', linewidth=3) ####################### PPE ################################## ax = plt.subplot2grid((6, 4), (4, 0), colspan=2, rowspan=2) #### Construct arrays for critical care and non-critical care patients # All covid patients expected in house on each forecasted day. PUI is just a name here PUI_COVID = np.array(total_nc) + np.array(total_cc) # Preparing to add new visits, fraction of new cases visiting your hospital = 0.01 * per_loc new_visits_your_hospital = ts_lag * (0.01 * per_loc) # Add number of new visits to number of in house patients PUI_COVID = PUI_COVID + new_visits_your_hospital glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int') glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE * PUI_COVID).astype('int') glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL * PUI_COVID).astype('int') face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG * PUI_COVID).astype('int') procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT * PUI_COVID).astype('int') isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW * PUI_COVID).astype('int') surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM * PUI_COVID).astype('int') face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG * PUI_COVID).astype('int') respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG * PUI_COVID).astype('int') ppe_ls =[[glove_surgical, 'GLOVE SURGICAL', 'r'], [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'], [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'], [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'], [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'], [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'], [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'], [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'], [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet']] linestyles = ['dashed', 'dotted', 'dashdot', 'dashed', 'dotted', 'dashdot', 'dotted', 'dashed', 'dashdot'] for i, ppe in enumerate(ppe_ls): plt.plot(fdates[-(ForecastDays+1):], ppe[0][-(ForecastDays+1):], c=ppe[2], label=ppe[1], linewidth=2, ls=linestyles[i]) plt.title('Forecasted PPE needs', fontsize = 16, fontweight = 'bold') #if log_scl == True: # plt.yscale('log') ax = plt.gca() temp = ax.xaxis.get_ticklabels() temp = list(set(temp) - set(temp[::12])) for label in temp: label.set_visible(False) leg = ax.legend(handlelength=0, handletextpad=0, fancybox=True, loc='best', frameon=True, fontsize=8) for line,text in zip(leg.get_lines(), leg.get_texts()): text.set_color(line.get_color()) for item in leg.legendHandles: item.set_visible(False) plt.ylabel('PPE Supplies', fontsize=14, fontweight='bold') plt.xlabel('Date', fontsize=14, fontweight='bold') ax = plt.subplot2grid((6, 4), (4, 2), colspan=2, rowspan=2) ax.axis('off') #ax.axis('tight') #### Construct arrays for critical care and non-critical care patients #PUI_COVID = np.array(total_nc) + np.array(total_cc) PUI_COVID = PUI_COVID[-(ForecastDays+1):] glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int') glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE * PUI_COVID).astype('int') glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL * PUI_COVID).astype('int') face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG * PUI_COVID).astype('int') procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT * PUI_COVID).astype('int') isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW * PUI_COVID).astype('int') surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM * PUI_COVID).astype('int') face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG * PUI_COVID).astype('int') respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG * PUI_COVID).astype('int') ppe_ls =[[glove_surgical, 'GLOVE SURGICAL', 'r'], [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'], [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'], [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'], [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'], [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'], [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'], [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'], [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet']] if len(loc) > 12: loc = loc[:12] loc = loc + '...' col_labels = [ppe_ls[0][1], ppe_ls[1][1], ppe_ls[2][1], ppe_ls[3][1], ppe_ls[4][1], ppe_ls[5][1], ppe_ls[6][1], ppe_ls[7][1], ppe_ls[8][1]] row_labels = fdates.tolist() row_labels = row_labels[-(ForecastDays+1):] table_vals = [] cclr_vals = [] rclr_vals = [] Forecasted_ppe_needs_df_for_download = pd.DataFrame(columns = ['date'] + col_labels) for i in range(len(row_labels)): cell = [ppe_ls[0][0][i], ppe_ls[1][0][i], ppe_ls[2][0][i], ppe_ls[3][0][i], ppe_ls[4][0][i], ppe_ls[5][0][i], ppe_ls[6][0][i], ppe_ls[7][0][i], ppe_ls[8][0][i]] df_row = [row_labels[i]] df_row.extend(cell) labs = ['date'] + col_labels temp = pd.DataFrame([df_row], columns=labs) Forecasted_ppe_needs_df_for_download = pd.concat([Forecasted_ppe_needs_df_for_download, temp]) if i == 0: rclr = '0.8' cclr = ['0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8'] else: rclr = 'w' cclr = ['w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w'] table_vals.append(cell) cclr_vals.append(cclr) rclr_vals.append(rclr) #ncol = 9 cwp = 0.15 lim = 15 the_table = plt.table(cellText=table_vals[0:lim], colWidths=[cwp]*9, rowLabels=row_labels[0:lim], colLabels=None, cellLoc='center', loc='upper center', cellColours=cclr_vals[0:lim], rowColours =rclr_vals[0:lim]) the_table.auto_set_font_size(True) the_table.scale(1, 1.32) for i in range(len(ppe_ls)): clr = ppe_ls[i][2] for j in range(lim): the_table[(j, i)].get_text().set_color(clr) # set values for diagonal column labels hoffset = -0.3 #find this number from trial and error voffset = 1.0 #find this number from trial and error col_width = [0.06, 0.09, 0.09, 0.12, 0.133, 0.138, 0.128, 0.135, 0.142] col_labels2 =[['GLOVE SURGICAL', 'r'], ['GLOVE EXAM NITRILE', 'orange'], ['GLOVE GLOVE EXAM VINYL', 'goldenrod'], ['MASK FACE PROC. A-FOG', 'limegreen'], ['MASK PROC. FLUID RES.', 'green'], ['GOWN ISO. XL YELLOW', 'cornflowerblue'], ['MASK SURG. ANTI FOG W/FILM', 'blue'], ['SHIELD FACE FULL ANTI FOG', 'plum'], ['RESP. PART. FILTER REG', 'darkviolet']] count=0 for i, val in enumerate(col_labels2): ax.annotate(' '+val[0], xy=(hoffset + count * col_width[i], voffset), xycoords='axes fraction', ha='left', va='bottom', rotation=-25, size=8, c=val[1]) count+=1 plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1.1, hspace=1.1) return Forecasted_cases_df_for_download, Forecasted_patient_census_df_for_download, Forecasted_ppe_needs_df_for_download
# Pyplot style import matplotlib.pyplot as plt data_uas = [['Bejo', 70], ['Tejo', 83], ['Cecep', 62], ['Wati', 74], ['Karti', 71]] table = plt.table(cellText=data_uas, loc='center') table.set_fontsize(14) table.scale(1, 4) ax = plt.gca() ax.axis(False) plt.show()
def chart(consensus, hydro, chain, stru, cd_hit, filename, col): minC = 60.0 #min side-chain volume maxC = 230.0 #max side-chain volume l = len(consensus) if col > l: col = l k = len(cd_hit) rows = int(np.ceil(float(l) / col)) #number of rows fig = plt.figure(figsize=(col / 4.0 + 4, rows * (9 + k / 2.0) + 4)) inchH = 1.0 / (rows * (9 + k / 2.0) + 4) colBarH = 3.0 * inchH #colorbar height margin = 2.0 * inchH height = 6.0 * inchH #barchart height seqH = (k / 2.0 + 1.0) * inchH #sequence table height inchW = 1.0 / (col / 4.0 + 4) wykr = plt.axes([2 * inchW, 1 - colBarH, 1 - 4 * inchW, colBarH]) wykr.set_title("MSA Visualization", y=0.4) plt.axis('off') #side chain volume colorbar m = cm.ScalarMappable(cmap=cm.autumn) m.set_array(np.array([minC, maxC])) cbr = plt.colorbar(m, orientation='horizontal', fraction=0.4) cbr.set_label('Side Chain Volume') width = 1 widths = [1.0 / col] * col for r in xrange(rows): #barchart if r == rows - 1: tmp = plt.axes([ 2 * inchW, 1 - (r + 1) * (margin + height + seqH) - colBarH + seqH, (1 - 4 * inchW) * (l - col * (rows - 1)) / float(col), height ], xlabel="Amino Acid", ylabel='Hydrophobicity') plt.axis([(rows - 1) * col - 0.5, l - 0.5, -5, 5]) #min and max of the x and y axes plt.xticks(range(col * (rows - 1), l, 5)) else: tmp = plt.axes([ 2 * inchW, 1 - (r + 1) * (margin + height + seqH) - colBarH + seqH, 1 - 4 * inchW, height ], xlabel="Amino Acid", ylabel='Hydrophobicity') plt.axis([r * col - 0.5, (r + 1) * col - 0.5, -5, 5]) #min and max of the x and y axes plt.xticks(range(col * r, (r + 1) * col, 5)) for i in xrange(col): if r == rows - 1 and i == l - col * (rows - 1): break #break if last chart is shorter c = (1, (chain[col * r + i] - minC) / (maxC - minC), 0) #bar color tmp.bar(col * r + i, hydro[col * r + i], width, color=c, align='center', linewidth=1) if r == 0: #consensus table tabCons = plt.table(cellText=[consensus[col * r:col * (r + 1)]], cellLoc='center', rowLabels=["consensus"], colWidths=widths, bbox=[0, 1.07, 1, 0.04]) #structure table tabStru = plt.table(cellText=[stru[col * r:col * (r + 1)]], cellLoc='center', rowLabels=["structure"], colWidths=widths, bbox=[0, 1.02, 1, 0.04]) #sequence table text = [] labels = [] for key in sorted(cd_hit.keys()): text.append(cd_hit[key][col * r:col * (r + 1)]) if len(key) > 15: labels.append(key[:15] + ": ") else: labels.append(key + ": ") tabCdHit = plt.table(cellText=text, cellLoc='center', colWidths=widths, rowLabels=labels, bbox=[0, -(k / 2.0 + 1.0) / 6.0, 1, k / 12.0]) elif r == rows - 1: widths = [1.0 / col] * (l - col * (rows - 1)) #consensus table tabCons = plt.table(cellText=[consensus[col * (rows - 1):]], cellLoc='center', colWidths=widths, bbox=[0, 1.07, 1, 0.04]) #structure table tabStru = plt.table(cellText=[stru[col * (rows - 1):]], cellLoc='center', colWidths=widths, bbox=[0, 1.02, 1, 0.04]) #sequence table text = [] for key in sorted(cd_hit.keys()): text.append(cd_hit[key][col * r:col * (r + 1)]) tabCdHit = plt.table(cellText=text, cellLoc='center', colWidths=widths, bbox=[0, -(k / 2.0 + 1.0) / 6.0, 1, k / 12.0]) else: #consensus table tabCons = plt.table(cellText=[consensus[col * r:col * (r + 1)]], cellLoc='center', colWidths=widths, bbox=[0, 1.07, 1, 0.04]) #structure table tabStru = plt.table(cellText=[stru[col * r:col * (r + 1)]], cellLoc='center', colWidths=widths, bbox=[0, 1.02, 1, 0.04]) #sequence table text = [] for key in sorted(cd_hit.keys()): text.append(cd_hit[key][col * r:col * (r + 1)]) tabCdHit = plt.table(cellText=text, cellLoc='center', colWidths=widths, bbox=[0, -(k / 2.0 + 1.0) / 6.0, 1, k / 12.0]) tabCons.auto_set_font_size(False) tabCons.set_fontsize(9) tabStru.auto_set_font_size(False) tabStru.set_fontsize(9) tabCdHit.auto_set_font_size(False) tabCdHit.set_fontsize(12) for v in tabCdHit.get_celld().values(): v.set_edgecolor('w') plt.savefig(filename) return fig
def desc_table(df): #ltype = df.type[df.type.str.contains('%|Land ')].unique() ltype = [ 'Land area (thousand hectares)', 'Arable land (% of total land area)', 'Permanent crops (% of total land area)', 'Forest cover (% of total land area)', 'Important sites for terrestrial biodiversity protected (% of total sites protected)' ] # Adecuate the strings of types of lands lterms = [] for n in ltype: s = (re.sub("[\(\[].*?[\)\]]", "", n)).strip() s = s.replace('Land area', 'Total Land Area') words = s.split() letters = [word[0].upper().strip() for word in words] o = "".join(letters) o = o.replace('ISFTBP', 'IB') n = (s + ' (' + o + ')') lterms.append(n) #Spliting the sentence for adequate it to the column width lterms[ 4] = 'Important sites for\n terrestrial biodiversity\n protected (IB)' # Manual list with definitions of the type of lands t_0 = 'Total area excluding area under inland water bodies. The definition of inland water bodies generally includes\n major rivers and lakes. Data is expressed in 1000 hectares (Ha).' t_1 = 'Arable land includes land defined by the FAO as land under temporary crops (double-cropped areas are counted once),\n temporary meadows for mowing or for pasture, land under market or kitchen gardens, and land temporarily fallow.\n Land abandoned as a result of shifting cultivation is excluded.' t_2 = 'Crops are divided into temporary and permanent crops. Permanent crops are sown or planted once, and then occupy\n the land for some years and need not be replanted after each annual harvest, such as cocoa, coffee and rubber.\n This category includes flowering shrubs, types fruit trees, nut trees and vines, but excludes trees grown for wood or timber.' t_3 = 'Area covered with forest.' t_4 = 'Terrestrial protected areas.' rows = [[lterms[0], t_0], [lterms[1], t_1], [lterms[2], t_2], [lterms[3], t_3], [lterms[4], t_4]] columns = ['Types', 'Description'] color = [["gainsboro", "gainsboro"], ["lightsalmon", "lightsalmon"], ["indianred", "indianred"], ["lightblue", "lightblue"], ["cornflowerblue", "cornflowerblue"]] fig = plt.figure(figsize=(15, 5)) ax = fig.add_subplot(111, frameon=False, xticks=[], yticks=[]) tab = plt.table( colLabels=columns, cellText=rows, loc='center', cellColours=color, colWidths=(0.23, 1.17), cellLoc='center', rowLoc='center', bbox=(-0.16, 0, 1.28, 1), ) # Changing the fontfamily for n in range(len(columns)): tab[0, n].set_text_props(fontfamily='Purisa', fontweight='heavy', size=16) for n in range(1, (len(rows) + 1)): for s in range(0, 2): tab[n, s].set_text_props(size=14) # Individual settings tab.auto_set_font_size(False) tab.set_in_layout(True) tab.scale(0.9, 5.5) #set the width of the collumns # Cell height settings cellDict = tab.get_celld() for n in range(5): l = [0, 4, 5] if n in l: cellDict[(n, 0)].set_height(0.15) cellDict[(n, 1)].set_height(0.15) plt.savefig('../images/desc_table.png') return plt.show()
def print_prediction_multiple_models(self): change_folder("Results_Merge_Data") measures = return_meas_per_prop() with open("end_dropped_data_first_values.json", 'r') as handle: first_values = json.load(handle) with open("end_dropped_data_last_values.json", 'r') as handle: last_values = json.load(handle) with open("end_dropped_data_coefs.json", 'r') as handle: aver_coefs = json.load(handle) move_initial_folder() change_folder("Training") change_inside_folder("models") for property in measures: change_inside_folder(property) change_inside_folder("poly") #It can be change to linear and rbf count = 0 for measure in measures[property]: count += 1 I = np.array([[ first_values[property][measure][j], last_values[property][measure][j], aver_coefs[property][measure][j] ] for j in range(0, len(first_values[property][measure]))]) if count == 1: I2 = I else: I2 = np.concatenate((I2, I), axis=1) max_per = 0 max_a = 0 max_b = 0 count = 0 vals = [[0 for c in range(20)] for t in range(20)] label_col = [69 / 255, 139 / 255, 116 / 255] colors = [[(245 / 255, 245 / 255, 220 / 255) for c in range(20)] for t in range(20)] labely = [] labelx = [] nu_1 = 0 for k in range(1, 21): nu_1 = 0.01 * k labely.append(nu_1) labelx.append(nu_1) for a in range(1, 21): nu_tes = (a * 0.01) for b in range(1, 21): gama = (b * 0.01) clf = joblib.load('nu_' + str(nu_tes) + 'gama_' + str(gama) + '_' + property + '_model.joblib') predictions = clf.predict(I2) decision = clf.decision_function(I2) k = round(predictions.tolist().count(1) / len(predictions), 3) if k > max_per: max_per = k max_a = a max_b = b vals[a - 1][b - 1] = k colors[max_a - 1][max_b - 1] = [0, 0, 238 / 255] tab = plt.table(cellText=vals, rowLabels=labelx, colLabels=labely, rowColours=[label_col] * 20, colColours=[label_col] * 20, cellColours=colors, cellLoc='center', loc='upper left') plt.axis('off') move_initial_folder() change_folder("Training") change_inside_folder("Best_models") plt.savefig(property) plt.clf() move_initial_folder() change_folder("Training") change_inside_folder("models") print('Max percentage ' + str(max_per) + ' for nu ' + str(max_a * 0.01) + ' and for gama ' + str(max_b * 0.01) + " property " + property)
def executor(merged_time_series_to_cluster, upstream_TSS=0, downstream_TSS=0, diff_bind_version=False, mode_atr=["FIRST_TS", "SECOND_TS"][1], mode_atr2=["ENHANCER", "GENE", "TSS"][1], GLOBAL_OR_SURVIVED=["survived", "global"][1], mode_of_data_sets=["Ciiras", "Others_from_cistrom_finder"][0], sorted_mode=["amplitude_sorted", "size_sorted"][1], dont_plot=["ESR2", "RAD21"]): pwd = os.getcwd() hg = 'hg19' if mode_of_data_sets == "Ciiras": name_of_files = np.loadtxt(pwd + "/" + hg + "/list_of_files_Ciira_names_changed.txt", dtype=str) elif mode_of_data_sets == "Others_from_cistrom_finder": name_of_files = np.loadtxt(pwd + "/" + hg + "/list_of_files.txt", dtype=str) path_to_R = pwd + "/R_scripts/AP_clustering_output/" survived = np.loadtxt( path_to_R + '{0}_survived_indexes'.format(merged_time_series_to_cluster)).astype( int) # saved during filtering if diff_bind_version: peaks = np.loadtxt(config_variables.name_of_enhancer_file_for_overlap, dtype=str) indexes_of_DB_peaks = np.loadtxt(pwd + "/" + hg + "/indexes_of_DB_peaks.csv", dtype=int, skiprows=1, usecols=(1, ), delimiter=",") labels = np.zeros(len(peaks), int) labels[indexes_of_DB_peaks] = 1 labels = labels + 1 labels = labels[survived] else: labels = np.loadtxt( path_to_R + '{0}_labels'.format(merged_time_series_to_cluster), str, delimiter=",")[1:, 1].astype(int) # from EP clustering save_to_temp_folder = pwd + "/" + hg + "/" + merged_time_series_to_cluster + "_results_temp_{0}/".format( GLOBAL_OR_SURVIVED) if not os.path.exists(save_to_temp_folder): os.makedirs(save_to_temp_folder) def create_GENE_file(upstream_TSS): data = np.loadtxt( config_variables.name_of_time_series_promoter_file_for_TSS_start, dtype=str, delimiter='\t') plus_strand = data[:, 4] == '+' data[plus_strand, 1] = data[plus_strand, 1].astype(int) - upstream_TSS data[np.invert(plus_strand), 2] = data[np.invert(plus_strand), 2].astype(int) + upstream_TSS data = np.c_[data, range(len(data))] np.savetxt(name_of_file_for_overlap, data, fmt='%s', delimiter='\t') def create_TSS_file(upstream_TSS, downstream_TSS): data = np.loadtxt( config_variables.name_of_time_series_promoter_file_for_TSS_start, dtype=str, delimiter='\t') plus_strand = data[:, 4] == '+' data[plus_strand, 2] = data[plus_strand, 1].astype(int) + 1 data[np.invert(plus_strand), 1] = data[np.invert(plus_strand), 2].astype(int) - 1 data = np.c_[data, range(len(data))] np.savetxt(name_of_file_for_overlap, data, fmt='%s', delimiter='\t') if mode_atr2 == "TSS": name_of_file_for_overlap = save_to_temp_folder + config_variables.name_of_time_series_promoter_file_for_TSS_start[ 7:-3] + "_TSS_{0}_{1}".format(upstream_TSS, downstream_TSS) create_TSS_file(upstream_TSS, downstream_TSS) end_file_identifier = '{0}_{1}_{2}'.format(mode_atr2, upstream_TSS, downstream_TSS) if mode_atr2 == "GENE": name_of_file_for_overlap = save_to_temp_folder + config_variables.name_of_time_series_promoter_file_for_TSS_start[ 7:-3] + "_GENE_{0}".format(upstream_TSS) create_GENE_file(upstream_TSS) end_file_identifier = '{0}_{1}'.format(mode_atr2, upstream_TSS) elif mode_atr2 == "ENHANCER": name_of_file_for_overlap = config_variables.name_of_enhancer_file_for_overlap end_file_identifier = '{0}'.format(mode_atr2) def create_enrichment_matrix(): motif_enrichments = [[]] * len( np.loadtxt( name_of_file_for_overlap, dtype=str)) #tutaj trzeba zmienic na gena albo enhancera for name_of_file in name_of_files: name_of_file_ = pwd + "/" + hg + "/" + name_of_file command_line = "windowBed -a {0} -b {1} -sw -l {2} -r {3}".format( name_of_file_, name_of_file_for_overlap, upstream_TSS, downstream_TSS ) # name_of_enhancer_file_for_overlap zmienic na TSS w przypadku genow i dodac left right args = shlex.split(command_line) proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output_raw = proc.stdout.read() if len(output_raw): output = np.array( map(lambda x: x.split("\t"), (output_raw).split("\n"))[:-1]) np.savetxt(save_to_temp_folder + name_of_file[:-4] + '_overlap_{0}'.format(end_file_identifier), output, fmt='%s', delimiter='\t') for index_of_peak, peak_overlap in zip( output[:, -1].astype(int), output[:, :5]): motif_enrichments[ index_of_peak] = motif_enrichments[index_of_peak] + [ list(peak_overlap[[0, 1, 2, 4]]) + [peak_overlap[3].split("_")[-1]] + [name_of_file.split("_")[1]] + [name_of_file.split("_")[0]] ] file_1 = open( save_to_temp_folder + "enriched_peaks_{0}".format(end_file_identifier), 'w') peaks = np.loadtxt(name_of_file_for_overlap, dtype=str) for index in [ ind for ind, el in enumerate(motif_enrichments) if len(el) ]: array = motif_enrichments[index] for el in array: save = '\t'.join(np.r_[peaks[index], el]) save += '\n' file_1.write(save) file_1.close() enriched_peaks = np.loadtxt( save_to_temp_folder + "enriched_peaks_{0}".format(end_file_identifier), str) legend = np.unique(enriched_peaks[:, -2]) map_legend = {} for ind, el in enumerate(legend): map_legend[el] = ind count_matrix = np.zeros((len(motif_enrichments), len(legend)), bool) for el in enriched_peaks: count_matrix[int(el[-8]), map_legend[el[-2]]] = True np.save( save_to_temp_folder + "enrichment_matrix_{0}".format(end_file_identifier), count_matrix) return legend, count_matrix legend, count_matrix = create_enrichment_matrix() def sorts_labels(): labels_count = np.histogram(labels, bins=range(0, max(labels) + 2))[0][1:] sorted_counts_labels = np.argsort(labels_count)[::-1] sorted_counts = labels_count[sorted_counts_labels] sorted_labels = np.unique(labels)[sorted_counts_labels] def sorted_labels_func(): time_series_survived = np.loadtxt(path_to_R + merged_time_series_to_cluster, dtype=np.float, delimiter=",") means = [] for ind, label in enumerate(sorted_labels): if mode_atr == "SECOND_TS": mean = (time_series_survived[label == labels, 8:]).mean(0) elif mode_atr == "FIRST_TS": mean = (time_series_survived[label == labels, :8]).mean(0) means += [mean] means = np.array(means) ind = np.lexsort( (means[:, 7], means[:, 6], means[:, 5], means[:, 4], means[:, 3], means[:, 2], means[:, 1], means[:, 0])) amplitude = np.ravel(np.diff(means[:, [0, 4]])) #amplitude = means[:, 4]/means[:, 0] if sorted_mode == "amplitude_sorted": ind = np.argsort(amplitude)[::-1] elif sorted_mode == "size_sorted": ind = np.arange(len(amplitude)).astype(int) return ind if diff_bind_version: ind_sort = [0, 1] else: ind_sort = sorted_labels_func() sorted_labels = sorted_labels[ind_sort] sorted_counts = sorted_counts[ind_sort] return sorted_labels, sorted_counts, ind_sort sorted_labels, sorted_counts, ind_sort = sorts_labels() def calculates_probabilities_for_cluster(): print count_matrix[survived].sum(0) / float(survived.shape[0]) from scipy.stats import binom ps = count_matrix[survived].sum(0) / float(survived.shape[0]) prob = np.zeros((len(np.unique(labels)), len(ps))) enrichments_counts = prob.astype(int) #sorts for index_1, label in enumerate(sorted_labels): n = np.sum(labels == label) xs = count_matrix[survived][labels == label].sum(0) for index_2, p in enumerate(ps): p = ps[index_2] x = xs[index_2] prob[index_1, index_2] = 1. - binom.cdf(x - 1, n, p) enrichments_counts[index_1, index_2] = x np.savetxt( save_to_temp_folder + "_probabilities_of_enrichment_{0}".format(end_file_identifier), prob, delimiter="\t", fmt='%0.8f', header='\t'.join(legend)) return prob, enrichments_counts if GLOBAL_OR_SURVIVED == "survived": prob, enrichments_counts = calculates_probabilities_for_cluster() def calculates_probabilities_for_cluster_global(): if mode_atr2 == "ENHANCER": distal_mask = np.invert(config_variables.proximal_enhancers_mask) print count_matrix[distal_mask].sum(0) / float( count_matrix[distal_mask].shape[0]) ps = count_matrix[distal_mask].sum(0) / float( count_matrix[distal_mask].shape[0]) elif mode_atr2 == "GENE" or mode_atr2 == "TSS": print count_matrix.sum(0) / float(count_matrix.shape[0]) ps = count_matrix.sum(0) / float(count_matrix.shape[0]) from scipy.stats import binom prob = np.zeros((len(np.unique(labels)), len(ps))) enrichments_counts = prob.astype(int) #sorts for index_1, label in enumerate(sorted_labels): n = np.sum(labels == label) xs = count_matrix[survived][labels == label].sum(0) for index_2, p in enumerate(ps): p = ps[index_2] x = xs[index_2] prob[index_1, index_2] = 1. - binom.cdf(x - 1, n, p) enrichments_counts[index_1, index_2] = x np.savetxt( save_to_temp_folder + "_probabilities_of_enrichment_{0}".format(end_file_identifier), prob, delimiter="\t", fmt='%0.8f', header='\t'.join(legend)) return prob, enrichments_counts if GLOBAL_OR_SURVIVED == "global": prob, enrichments_counts = calculates_probabilities_for_cluster_global( ) mask_legend = np.ones_like(legend).astype(bool) mask_legend[np.in1d(legend, dont_plot)] = False file1 = open( save_to_temp_folder + merged_time_series_to_cluster + "_enrichment_{0}".format(end_file_identifier), "w") for i in range(len(prob)): file1.write(','.join(legend[(prob[i] < 0.01) * mask_legend]) + "\n") file1.close() from matplotlib import pyplot as plt time_series_survived = np.loadtxt(path_to_R + merged_time_series_to_cluster, dtype=np.float, delimiter=",") amplitude = np.zeros(len(sorted_labels)) for ind, label in enumerate(sorted_labels): if mode_atr == "SECOND_TS": mean = (time_series_survived[label == labels, 8:]).mean( 0 ) # tu trzeba to poprawic jesli chcesz dodac clustering dla geny elif mode_atr == "FIRST_TS": mean = (time_series_survived[label == labels, :8]).mean(0) #amplitude[ind] = mean[4]/mean[0]#np.diff(mean[[0,4]]) amplitude[ind] = np.diff(mean[[0, 4]]) idx = Index(np.unique(labels)) df = DataFrame(np.c_[prob[:, mask_legend], sorted_counts[:, None], amplitude], index=idx, columns=np.r_[legend[mask_legend], ["Count"], ["Amplitude"]]) vals = np.around(df.values, 2) normal = plt.Normalize(prob[:, mask_legend].min(), prob[:, mask_legend].max()) rise = np.zeros_like(vals).astype(bool) rise[:, :] = (amplitude > 0)[:, None] vals_enrich = np.c_[enrichments_counts[:, mask_legend], sorted_counts[:, None], (100 * amplitude[:, None]).astype(int)] matrix_colour = plt.cm.hot(normal(vals)) mask_encriched = np.c_[prob[:, mask_legend] < 0.01, np.ones((len(prob), 2), bool)] mask_encriched_2 = np.c_[prob[:, mask_legend] < 0.05, np.ones((len(prob), 2), bool)] mask_encriched_3 = np.c_[prob[:, mask_legend] < 0.001, np.ones((len(prob), 2), bool)] #matrix_colour[mask_encriched*rise] = np.array([0.0, 0.5019607843137255, 0.0, 0.6]) #matrix_colour[mask_encriched*np.invert(rise)] = np.array([0.0, 0.5019607843137255, 0.0, 0.3]) mask_depleted = np.c_[prob[:, mask_legend] > 0.99, np.ones((len(prob), 2), bool)] mask_depleted_2 = np.c_[prob[:, mask_legend] > 0.995, np.ones((len(prob), 2), bool)] mask_depleted_3 = np.c_[prob[:, mask_legend] > 0.999, np.ones((len(prob), 2), bool)] white = [1., 1., 1., 1.] mask_niether = np.invert(mask_encriched + mask_depleted) #matrix_colour[mask_depleted*rise] = np.array([0.768, 0.090, 0.090, 0.3]) #matrix_colour[mask_depleted*np.invert(rise)] = np.array([0.768, 0.090, 0.090, 0.6]) matrix_colour[mask_depleted] = [0.862745, 0.0784314, 0.235294, 0.7] matrix_colour[mask_depleted_2] = [0.862745, 0.0784314, 0.235294, 0.85] matrix_colour[mask_depleted_3] = [0.862745, 0.0784314, 0.235294, 1.] matrix_colour[mask_encriched] = [0.180392, 0.545098, 0.341176, .7] #[0., 1., 1., 1.] matrix_colour[mask_encriched_2] = [0.180392, 0.545098, 0.341176, .85] #[0., 1., 1., 1.] matrix_colour[mask_encriched_3] = [0.180392, 0.545098, 0.341176, 1.] #[0., 1., 1., 1.] matrix_colour[mask_niether] = [0.815, 0.803, 0.803, 1.] matrix_colour[:, -2] = white normal_2 = plt.Normalize(amplitude.min(), amplitude.max()) amplitude_column = plt.cm.bwr_r(normal_2(amplitude)) matrix_colour[:, -1] = amplitude_column #matrix_colour[rise[:,0], -1] = np.array([0.0, 0.5019607843137255, 0.0, 0.6]) #matrix_colour[np.invert(rise[:,0]), -1] = np.array([0.768, 0.090, 0.090, 0.6]) #fig = plt.figure(figsize=(12,10)) #ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[]) #the_table=plt.table(cellText=vals_enrich, rowLabels=df.index, colLabels=df.columns, # colWidths = [0.07]*vals.shape[1], loc='center', # cellColours=plt.get_cmap('Spectral')(normal(vals))) fig = plt.figure(figsize=(15, 11)) ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[]) #fig.subplots_adjust(right=0.8) #cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) sm = plt.cm.ScalarMappable(cmap="bwr_r", norm=plt.Normalize(vmin=-1, vmax=1)) sm._A = [] #fig.colorbar(sm,shrink=0.25)#, ax = cbar_ax #cmap_r = mpl.cm.jet #ax1 = fig.add_axes([0.0, 0.9, 0.15]) #norm = mpl.colors.Normalize(vmin=0, vmax=1) #cb1 = mpl.colorbar.ColorbarBase(ax1, cmap = cmap, norm=norm) the_table = plt.table(cellText=vals_enrich, rowLabels=ind_sort + 1, colLabels=df.columns, colWidths=[0.06] * vals.shape[1], rowLoc='right', loc='center left', cellColours=matrix_colour) #rowLabels=df.index import matplotlib.patches as mpatches line2a, = plt.plot([], [], label="enriched, p < 0.01", linewidth=15, color=[0.180392, 0.545098, 0.341176, 0.7]) #[0., 1., 1., 1.] line2b, = plt.plot([], [], label="enriched, p < 0.005", linewidth=15, color=[0.180392, 0.545098, 0.341176, 0.85]) line2c, = plt.plot([], [], label="enriched, p < 0.001", linewidth=15, color=[0.180392, 0.545098, 0.341176, 1.]) line3a, = plt.plot([], [], label="depleted, p < 0.01", linewidth=15, color=[0.862745, 0.0784314, 0.235294, 0.7]) line3b, = plt.plot([], [], label="depleted, p < 0.005", linewidth=15, color=[0.862745, 0.0784314, 0.235294, 0.85]) line3c, = plt.plot([], [], label="depleted, p < 0.001", linewidth=15, color=[0.862745, 0.0784314, 0.235294, 1.]) line1, = plt.plot([], [], label="neither", linewidth=15, color=[0.815, 0.803, 0.803, 1.]) #line4, = plt.plot([],[], label="rises between 0-40min", linewidth=15, color = [0.0, 0.5019607843137255, 0.0, 0.6]) #line5, = plt.plot([],[], label="drops between 0-40min", linewidth=15, color = [0.768, 0.090, 0.090, 0.6]) #line2, = plt.plot([],[], label="enriched & rise", linewidth=15, color = [0.0, 0.5019607843137255, 0.0, 0.6]) #line3, = plt.plot([],[], label="enriched & drops", linewidth=15, color = [0.0, 0.5019607843137255, 0.0, 0.3]) #line4, = plt.plot([],[], label="depleted & rise", linewidth=15, color = [0.768, 0.090, 0.090, 0.3]) #line5, = plt.plot([],[], label="depleted & drops", linewidth=15, color = [0.768, 0.090, 0.090, 0.6]) #line1, = plt.plot([],[], label="neither", linewidth=15, color = [0.815, 0.803, 0.803, 1.]) #fig.patch.set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) table_props = the_table.properties() table_cells = table_props['child_artists'] for cell in table_cells: cell.set_height(1.2 * cell.get_height()) #plt.legend() first_legend = plt.legend(bbox_to_anchor=(0.96, 1)) ax = plt.gca().add_artist(first_legend) cbaxes = fig.add_axes([0.685, 0.45, 0.025, 0.2]) cb = fig.colorbar(sm, cax=cbaxes, ticks=[-1, 0, 1]) #, shrink=2.) cb.ax.set_yticklabels( ['drops between 0-40min', 'stationary', 'rises between 0-40min']) #plt.text(2, 6, r'an equation: $E=mc^2$', fontsize=15) #plt.title("Transcription Factors", fontsize=20) #plt.ylabel('Clusters', fontsize=20) #plt.xlabel("distance [B]", fontsize=20) if diff_bind_version: name_save = '{0}TF_enrichment_{1}.pdf'.format(save_to_temp_folder, "diff_bind") else: name_save = '{0}TF_enrichment_{1}_{2}_{3}_{4}_0_40.pdf'.format( save_to_temp_folder, end_file_identifier, mode_atr, sorted_mode, mode_of_data_sets) pdf = PdfPages(name_save) pdf.savefig() pdf.close() plt.close('all')
def ozan_vis(self, iterations): for i in range(iterations): x_ini_np, x_true_np, y_np, lab_np = self.simulated_measurements(1) labels, output_pic, output_labels, fbp_clas = self.sess.run( [ self.ohl, self.result, self.probabilities, self.fbp_probabilities ], feed_dict={ self.x_ini: x_ini_np, self.x_true: x_true_np, self.y: y_np, self.labels: lab_np }) true_labels = [] for k in range(len(labels[0])): true_labels.append([labels[0][k]]) recon_labels = [] for k in range(len(output_labels[0])): recon_labels.append([output_labels[0][k]]) fbp_labels = [] for k in range(len(fbp_clas[0])): fbp_labels.append([fbp_clas[0][k]]) columns = ('Probability') rowLabels = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') # true figure plt.figure(1) plt.imshow(x_true_np[0, ..., 0], cmap='gray') plt.axis('off') # Add a table at the bottom of the axes plt.table(cellText=true_labels, rowLabels=rowLabels, colLabels=columns, loc='bottom') plt.savefig('Data/Evaluations/' + self.model_name + '_True_' + str(i) + '.png', bbox_inches='tight') plt.close() # reconstructed figure plt.figure(2) plt.imshow(output_pic[0, ..., 0], cmap='gray') plt.axis('off') # Add a table at the bottom of the axes plt.table(cellText=recon_labels, rowLabels=rowLabels, colLabels=columns, loc='bottom') plt.savefig('Data/Evaluations/' + self.model_name + '_Reconstruction_' + str(i) + '.png', bbox_inches='tight') plt.close() # fbp figure plt.figure(3) plt.imshow(x_ini_np[0, ..., 0], cmap='gray') plt.axis('off') # Add a table at the bottom of the axes plt.table(cellText=fbp_labels, rowLabels=rowLabels, colLabels=columns, loc='bottom') plt.savefig('Data/Evaluations/' + self.model_name + '_FBP_' + str(i) + '.png', bbox_inches='tight') plt.close()
if __name__ == '__main__': data = [] columns = ['niqe_mean', 'niqe_std'] rows = [] with open("result.csv") as f: f_csv = csv.DictReader(f) for i, row1 in enumerate(f_csv): rows.append(row1['name']) data.append(row1) n_rows = len(data) cell_text = [] for row in range(n_rows): y_offset = [] y_offset.clear() for col in columns: y_offset.append(f'{float(data[row][col]):.3f}') cell_text.append(y_offset) the_table = plt.table(cellText=cell_text, rowLabels=rows, colLabels=columns, loc='left', cellLoc='center', rowLoc='center') # the_table.scale(0.3, 1) # Adjust layout to make room for the table: plt.subplots_adjust(left=0.6, bottom=0.2) plt.axis('off') plt.show()
def test_non_square(): # Check that creating a non-square table works cellcolors = ['b', 'r'] plt.table(cellColours=cellcolors)
] table_vals_1 = recallmx table_vals_2 = precisionmx # 第一行第一列图形 #ax1 = plt.subplot(1, 2, 1) # 第一行第二列图形 #ax2 = plt.subplot(1, 2, 2) plt.figure(1) my_table_1 = plt.table(cellText=table_vals_1, colWidths=[0.111] * 10, rowLabels=row_labels, colLabels=col_labels, loc='best') #plt.sca(ax1) plt.axis('off') plt.title('recall') plt.plot() plt.show() plt.figure(2)
['0.6698717948717948', '1.0', '0.8023032629558542', '209'], ['0.6439232409381663', '0.766497461928934', '0.6998841251448435', '394'], ['0.4821917808219178', '0.6048109965635738', '0.5365853658536586', '291'], ['0.6576923076923077', '0.6951219512195121', '0.6758893280632411', '246'], ['0.8994082840236687', '0.8760806916426513', '0.8875912408759125', '347'], ['0.7724550898203593', '0.7865853658536586', '0.7794561933534744', '164'], ['0.2159090909090909', '0.3958333333333333', '0.27941176470588236', '144'], [ '0.47101449275362317', '0.5284552845528455', '0.49808429118773945', '246' ], ['0.7701863354037267', '0.5', '0.6063569682151588', '248'], ['0.7056737588652482', '0.7481203007518797', '0.7262773722627737', '266'], ['0.6666666666666666', '0.6358381502890174', '0.650887573964497', '346'], ['0.6196581196581197', '0.7038834951456311', '0.6590909090909092', '206'], ['0.774074074074074', '0.7827715355805244', '0.7783985102420856', '267'], ['0.8583333333333333', '0.9307228915662651', '0.8930635838150288', '332'] ] the_table = plt.table(cellText=data, colWidths=[0.1] * (len(col_label) + 1), rowLabels=row_label, colLabels=col_label, loc='center right') the_table.auto_set_font_size(False) the_table.set_fontsize(12) the_table.scale(2, 1) ax.axis('off') ax.axis('tight') plt.show()
('%.2f' % (np.mean(np.array(y_c1)))), ('%.2f' % (np.mean(np.array(y_c2)))), \ ('%.2f' % (np.mean(np.array(y_d1)))), ('%.2f' % (np.mean(np.array(y_d2))))]) col_value.append([('%.2f' % (np.mean(np.array(y_a)))), ('%.2f' % (np.std(np.array(y_a)))), ('%.2f' % (np.mean(np.array(y_b)))), ('%.2f' % (np.std(np.array(y_b)))), \ ('%.2f' % (np.mean(np.array(y_c1)))), ('%.2f' % (np.std(np.array(y_c1)))), ('%.2f' % (np.mean(np.array(y_c2)))), ('%.2f' % (np.std(np.array(y_c2)))), \ ('%.2f' % (np.mean(np.array(y_d1)))), ('%.2f' % (np.std(np.array(y_d1)))), ('%.2f' % (np.mean(np.array(y_d2)))), ('%.2f' % (np.std(np.array(y_d2))))]) plt.subplots_adjust(wspace=0.3, hspace=0.3) plt.figure(num+1) table_vals = [] tmp = [] for col in mean_value: print(np.array(col)) # for col in col_value: # print(np.array(col)) for i in range(0,len(col_value[0])): for col in col_value: tmp.append(col[i]) table_vals.append(tmp) # print(tmp) tmp = [] print(table_vals) print(col_labels) print(row_labels) my_table = plt.table(cellText=table_vals, colWidths=[0.2]*num, \ rowLabels=row_labels, colLabels=col_labels, \ loc='best') my_table.set_fontsize(20) plt.axis('off') # plt.show()
try: sys.argv[4] # Parameter which is entered by user except Exception as e: showTable = True # default display Table else: showTable = False if sys.argv[ 4] == '1': # if user enter 1 display table else dont display the table showTable = True plt.xticks(x, my_xticks, fontsize=numberFontSize) if showTable: # True display Table # First Table start the_table = plt.table(cellText=y, colLabels=my_xticks, loc='bottom', colLoc='right', rowLoc='left') the_table.set_fontsize(numberFontSize) the_table.scale(1, 1) #Remove Border of table 1 cell for key, cell in the_table.get_celld().items(): cell.set_linewidth(0) # First Table end # right side table of company name start my_xticks_1 = [titleName] legendLabel_1 = np.reshape(legendLabel, (-1, 1))
# 5 table if __name__ == '__main__': import matplotlib.pyplot as plt import numpy as np fig = plt.figure(figsize=(9, 9), facecolor='white') ax = fig.add_axes([0.00, 0.00, 1, 1], facecolor='white', zorder=0) # -----------------------------生成table---------------------------------------------------------------------------- # 用celltext来生成,celltext是一个矩阵,里面放text # plt.table? cell_text = [['●'] * 51] * 51 # float也可以 table = plt.table( cellText=cell_text, cellLoc='center', # colWidths=[0.0196,]*10, # rowLabels=['1']*4, rowColours=['red']*4, rowLoc='left', # colLabels=['2']*6, colColours=['yellow']*6, colLoc='center', loc='center' # 表格所在位置 ) fig.show() del fig, ax, table # 用cellColours来生成 cell_color = [ ['yellow'] * 3, ['red'] * 3, ['green'] * 3, ] table2 = plt.table(cellColours=cell_color, cellLoc='center', loc='center') fig.show()
def main(): base_f = "base_out.csv" baseline = np.array(pd.read_csv(base_f, header=None)) q = [] d = [] th = [] ti = [] cp = [] for i in range(1, 9): out_file = str(i) + "_out_method3.csv" output = np.array(pd.read_csv(out_file, header=None)) queue_error = 0.0 dynamic_error = 0.0 for j in range(len(output)): queue = baseline[j][1] - output[j][1] queue = queue * queue dynamic = baseline[j][2] - output[j][2] dynamic = dynamic * dynamic queue_error = queue_error + queue dynamic_error = dynamic_error + dynamic queue_error = queue_error / len(output) dynamic_error = dynamic_error / len(output) th.append(i) q.append(queue_error) d.append(dynamic_error) runtime_file = "runtime_method3.csv" cpu_file = "cpuUtilisation_method3.csv" runtime = np.array(pd.read_csv(runtime_file, header=None)) cpu = np.array(pd.read_csv(cpu_file, header=None)) for i in range(len(cpu)): ti.append(runtime[i][1]) cp.append(cpu[i][1]) plt.figure() plt.xlabel("Number of threads") plt.ylabel("Avgerage squared error") plt.plot(th, q, label="Queue Density error", marker='o') plt.plot(th, d, label="Dynamic Density error", marker='o') plt.legend() plt.grid() plt.savefig("plot1.png", dpi=200) plt.show() fig, ax = plt.subplots() ax.set_xlabel("Number of threads") ax.set_ylabel("Runtime(seconds)") ln1 = ax.plot(th, ti, label="Runtime(seconds)", color="red", marker='o') ax2 = ax.twinx() ax2.set_ylabel("CPU Utilisation %") ln2 = ax2.plot(th, cp, label="Percentage of cpu utilised by the program", color="blue", marker='o') lns = ln1 + ln2 labs = [l.get_label() for l in lns] ax.legend(lns, labs, loc=5) plt.grid() plt.savefig("plot2.png", dpi=200) plt.show() cell_text = [] for i in range(len(th)): cell_text.append([th[i], ti[i], cp[i], q[i], d[i]]) table = plt.table(cellText=cell_text, colLabels=[ 'Number of threads', 'Runtime(s)', 'CPU Utilisation(%)', 'Queue Density Error', 'Dynamic Density Error' ], loc='center') ax = plt.gca() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.box(on=None) table.scale(1, 1.5) fig = plt.gcf() fig.set_size_inches(11, 7) plt.savefig("table1.png", dpi=200) plt.show() q = list(map(lambda x: 100.0 / (1.0 + x), q)) d = list(map(lambda x: 100.0 / (1.0 + x), d)) plt.figure() plt.xlabel("Number of threads") plt.ylabel("Utility Percentage") plt.plot(th, q, label="Queue Density utility percentage", marker='o') plt.plot(th, d, label="Dynamic Density utility percentage", marker='o') plt.legend() plt.grid() fig = plt.gcf() fig.set_size_inches(8, 6) plt.savefig("plot3.png", dpi=200) plt.show() plt.figure() plt.xlabel("Runtime (seconds)") plt.ylabel("Utility Percentage") plt.plot(ti, q, label="Queue Density utility percentage", marker='o') plt.plot(ti, d, label="Dynamic Density utility percentage", marker='o') plt.legend() plt.grid() fig = plt.gcf() fig.set_size_inches(8, 6) plt.savefig("plot4.png", dpi=200) plt.show() cell_text = [] for i in range(len(th)): cell_text.append([th[i], ti[i], q[i], d[i]]) table = plt.table(cellText=cell_text, colLabels=[ 'Number of threads', 'Runtime(sec)', 'Queue Density Utility(%)', 'Dynamic Density Utility(%)' ], loc='center') ax = plt.gca() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.box(on=None) table.scale(1, 1.5) fig = plt.gcf() fig.set_size_inches(10, 7) plt.savefig("table2.png", dpi=200) plt.show()
ax2.set_xlim([0, np.e]) ax2.set_ylabel('Y values for ln(x)') ax2.set_xlabel('Same X for both exp(-x) and ln(x)') plt.show() # hist mu = 100 # mean of distribution sigma = 15 # standard deviation of distribution x1 = mu + sigma * np.random.randn(10000) x2 = mu + 50 + sigma * np.random.randn(10000) num_bins = 50 n1, bins1, patches1 = plt.hist(x1, num_bins, normed=1, facecolor='green', alpha=0.3, histtype='stepfilled') n1, bins1, patches1 = plt.hist(x2, num_bins, normed=1, facecolor='red', alpha=0.3, histtype='stepfilled') plt.table(cellText=[['a', 'b', 'c'], [1, 2, 3]], rowLabels=['1 row', '2 row'], colLabels=['1 col', '2 col', '3 col'], loc='bottom', bbox=[0, -0.25, 1, 0.15]) # adding text, legends, table ....
col_labels = ['mean', 'std'] row_labels = ['ICTON', 'ICTONwT', '41-to-40', '42-to-40'] table_vals = [[round(np.mean(io), 4), round(np.std(io), 4)], [round(np.mean(ic), 4), round(np.std(ic), 4)], [round(np.mean(n4140), 4), round(np.std(n4140), 4)], [round(np.mean(n4240), 4), round(np.std(n4240), 4)]] # Draw table the_table = plt.table(cellText=table_vals, colWidths=[0.1] * 3, rowLabels=row_labels, colLabels=col_labels, loc='center') the_table.auto_set_font_size(False) the_table.set_fontsize(20) the_table.scale(4, 4) plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)
def MWT(points, n): for m in range(n): x = points[m][0] y = points[m][1] plt.plot(x, y, 'bo') plt.text(x * (1 + 0.01), y * (1 + 0.01), m, fontsize=10) if m == n - 1: m = -1 x, y = [points[m][0], points[m + 1][0]], [points[m][1], points[m + 1][1]] plt.plot(x, y, marker='o') plt.draw() if n < 3: return 0 columns = [x for x in range(len(points))] rows = [x for x in range(len(points))] n_rows = len(points) table = [] ktable = [] for row in range(n_rows): table.append([math.inf] * len(points)) ktable.append([-1] * len(points)) gap = 0 while gap < n: i = 0 j = gap while j < n: if j < (i + 2): table[i][j] = 0 the_table = plt.table(cellText=table, rowLabels=rows, colLabels=columns, loc='bottom') the_table._cells[(i + 1, j)].set_facecolor("#56b5fd") else: table[i][j] = math.inf k = i + 1 while k < j: val = int( round(table[i][k] + table[k][j] + cost(points, i, j, k))) if table[i][j] > val: table[i][j] = val ktable[i][j] = k the_table = plt.table(cellText=table, rowLabels=rows, colLabels=columns, loc='bottom') the_table._cells[(i + 1, j)].set_facecolor("#56b5fd") the_table._cells[(i + 1, k)].set_facecolor("red") the_table._cells[(k + 1, j)].set_facecolor("red") the_ktable = plt.table(cellText=ktable, rowLabels=rows, colLabels=columns, loc='top') the_ktable._cells[(i + 1, j)].set_facecolor("#56b5fd") plt.draw() plt.pause(0.01) k = k + 1 i = i + 1 j = j + 1 gap = gap + 1 for a in range(0, n): for b in range(0, n): if table[a][b] == math.inf: table[a][b] = None p = [] for x in range(n): p.append(x) j = n - 1 the_table = plt.table(cellText=table, rowLabels=rows, colLabels=columns, loc='bottom') the_ktable = plt.table(cellText=ktable, rowLabels=rows, colLabels=columns, loc='top') # Adjust layout to make room for the table: draw(0, j, int(round(ktable[0][n - 1])), table, the_table, ktable, the_ktable, points)
def plot_docs_distribution(df, col_name, col_score_name, num_topics, topic_labels, trow_label, title, width=0.3, tscale_x=1.5, tscale_y=2, num_cuts=4, pad=90, table_vals=[[ 'Below 0.25', '0.25 to 0.5', '0.5 to 0.75', '0.75 or Above' ]], bins=[0, 0.25, 0.5, 0.75, 1], bin_labels=['Very Low', 'Low', 'Medium', 'High']): """ Create a bar chart of documents distribution per score range. Args: df: a data frame col_name: df's column name of class label col_score_name: df's colum name of scores num_topics: number of topics/clusters trow_label: row labels for data table title: plot's title width: bar's width tscale_x: data table's scaled value for width tscale_y: data table's scaled value for height num_cuts: number of cuts for bin pad: the padding of title above the plot bins: a list of score's range bin_labels: a list of labels for score ranges """ # plot number of documents per score range fig = plt.figure(figsize=(12, 7)) ax = fig.add_subplot(111) ind = np.arange(num_cuts) # the x locations for the groups # get the counts of documents class_counts = dict(df[col_name].value_counts()) # plot the bars for i in range(num_topics): topic_label = topic_labels[i] # model topic's label # get scores m_class = df[df[col_name] == topic_label][col_score_name] # assign scores to bin class_array = np.histogram(m_class, bins=bins) # plot the bars rects = ax.bar(ind + (width * i), class_array[0], width=width, align='center', label=col_name + ' ' + topic_label + ' (' + str(class_counts[topic_label]) + ')') # put value on top of each bar for rect in rects: h = rect.get_height() if h > 0: ax.text(rect.get_x() + rect.get_width() / 2., 1.01 * h, '%d' % int(h), ha='center', va='bottom') # show data table cell_colors = [['lightblue', 'lightblue', 'lightblue', 'lightblue']] table = plt.table(cellText=table_vals, cellColours=cell_colors, colWidths=[0.1] * 6, rowLabels=trow_label, colLabels=bin_labels, rowColours=['lightblue'], loc='top') table.auto_set_font_size(False) table.set_fontsize(11) table.scale(tscale_x, tscale_y) # adjust layout to make room for the table: plt.subplots_adjust(left=0.2, bottom=0.2) # hide top and right border _ = [ plt.gca().spines[loc].set_visible(False) for loc in ['top', 'right', 'left'] ] ax.set_xticks(ind + width) ax.set_xticklabels(tuple(bin_labels)) #ax.set_xlabel(col_score_name, fontsize=13, fontweight='bold') ax.get_yaxis().set_visible(False) plt.legend(frameon=False) plt.title(title, fontsize=15, verticalalignment='top', pad=pad, fontweight='bold') plt.savefig('images/dist_per_' + '_'.join(col_score_name.split(' '))) plt.show()
def printPath(pathtemp, fun, num): plt.figure(figsize=(10, 10)) plt.subplot(2, 1, 1) ylist = [] yfitlist = [] colors = [] paras = [] col_labels = [] row_labels = [] colorlist = ['black', 'red', 'blue', 'yellow', 'green'] # pathtemp=eval(repr(pathtemp).replace('\\', '@')) path = pathtemp + "处理后的原始数据/" # 所有文件夹,第一个字段是次目录的级别 dirList = [] # 所有文件 fileList = [] # 返回一个列表,其中包含在目录条目的名称 files = os.listdir(path) # print(files) # 先添加目录级别 for f in files: if (os.path.isdir(path + '/' + f)): # 排除隐藏文件夹。因为隐藏文件夹过多 if (f[0] == '.'): pass else: # 添加非隐藏文件夹 dirList.append(f) if (os.path.isfile(path + '/' + f)): # 添加文件 fileList.append(f) titletext = fileList[0].split() # print(fileList[0]) title = titletext[0] + titletext[1] + "实验数据图" # title=ftitletext+"实验数据图" colornum = 0 dt = 0 # for fl in fileList: # x=[] # y=[] # # # 打印文件 # #print(fl) # f = open(path + fl) # 读取完txt再读txt里面的类容 # alllines = f.readlines() # # for eachLine in alllines: # eachdata = eachLine.split() # x.append(float(eachdata[0])) # y.append(float(eachdata[1])) # dt=float(eachdata[2]) # # print(z) # # print(x) # # print(y) # #ax.plot(x,y,z,label=fl) # # plt.plot(x,y,label=fl) # #color = plt.cm.Set2(random.choice(range(plt.cm.Set2.N))) # #dz = hist.flatten() # # color = plt.cm.Set2(random.choice(range(plt.cm.Set2.N))) # # colors.append(color) # color=colorlist[colornum] # colornum+=1 # if(fun==4 or fun==2): # plt.scatter(x,y,color=color,label=re.sub(r'[A-Za-z]',"", fl.split("-")[1]),marker="*") #张老师数据 # # plt.scatter(x, y, color=color, label=fl.split(".")[0], marker="*", s=0.8) # 佳蕾姐数据 # elif(fun==6 or fun==5): # xbar=np.asarray(x)+(dt/2) # ybar=np.asarray(y)/dt # plt.bar(xbar,ybar,color=color,width=dt,alpha=0.5) # # plt.scatter(x,y,color=color,label=fl.split(".")[0],marker="*",s=0.8) #佳蕾姐数据 # # plt.scatter(x,y,color=color,label=re.sub(r'[A-Za-z]',"", fl.split("-")[1]),marker="o") # ylist.append(y) # # # # # 拟合曲线 fitingpath = "" spotdatapath = pathtemp + "处理后的原始数据/" if (fun == 1): fitingpath = pathtemp + "拟合结果/" if (fun == 2): fitingpath = pathtemp + "指数拟合结果/" paras.append(["测量点", "${I_0}$", "${\\tau}$", "D", "${R^2}$"]) if (fun == 3): fitingpath = pathtemp + "拟合结果/" if (fun == 4): fitingpath = pathtemp + "双曲线拟合结果/" paras.append([ "测量点", "${I_0}$", "s2", "${\\tau}$", "${\gamma}$", "D", "${R^2}$" ]) if (fun == 5): fitingpath = pathtemp + "指数积分形式拟合结果/" paras.append(["测量点", "${I_0}$", "${\\tau}$", "D", "${R^2}$"]) if (fun == 6): fitingpath = pathtemp + "双曲线积分形式拟合结果/" paras.append([ "测量点", "${I_0}$", "s2", "${\\tau}$", "${\gamma}$", "D", "${R^2}$" ]) # print("fitingpath"+fitingpath) files = os.listdir(fitingpath) # print(files) # 先添加目录级别 fileList2 = [] # print(files) for f in files: if (os.path.isdir(fitingpath + '/' + f)): # print(f) # 排除隐藏文件夹。因为隐藏文件夹过多 if (f[0] == '.'): pass else: # 添加非隐藏文件夹 dirList.append(f) if (os.path.isfile(fitingpath + '/' + f)): # print(f) # 添加文件 fileList2.append(f) # print(fileList2) colornum = 0 for fl in fileList2: # 读取原始数据 x = [] y = [] rawf = open(spotdatapath + fl) alllines = rawf.readlines() for eachLine in alllines: eachdata = eachLine.split() x.append(float(eachdata[0])) y.append(float(eachdata[1])) dt = float(eachdata[2]) # print(z) # print(x) # print(y) # ax.plot(x,y,z,label=fl) # plt.plot(x,y,label=fl) # color = plt.cm.Set2(random.choice(range(plt.cm.Set2.N))) # dz = hist.flatten() # color = plt.cm.Set2(random.choice(range(plt.cm.Set2.N))) # colors.append(color) color = colorlist[colornum] # colornum += 1 if (fun == 4 or fun == 2): plt.scatter(x, y, color=color, label=re.sub(r'[A-Za-z]', "", fl.split("-")[1]), marker="*") # 张老师数据 # plt.scatter(x, y, color=color, label=fl.split(".")[0], marker="*", s=0.8) # 佳蕾姐数据 elif (fun == 6 or fun == 5): xbar = np.asarray(x) + (dt / 2) ybar = np.asarray(y) / dt plt.bar(xbar, ybar, color=color, width=dt, alpha=0.5) # plt.scatter(x,y,color=color,label=fl.split(".")[0],marker="*",s=0.8) #佳蕾姐数据 # plt.scatter(x,y,color=color,label=re.sub(r'[A-Za-z]',"", fl.split("-")[1]),marker="o") ylist.append(y) # 打印文件 f = open(fitingpath + fl) # 读取完txt再读txt里面的类容 # print(f) alllines = f.readlines() eachdata = alllines[num].split() # print(eachdata[0]) xfit = np.linspace(x[0], x[-1] + dt, 1000) yfit = 0 yfitspot = 0 flmain = "" if (fun == 1): s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) s4 = float(eachdata[3]) s5 = float(eachdata[4]) rs = float(eachdata[5]) paras.append([s1, s2, s3, s4, s5, rs]) col_labels = ["s1", "s2", "s3", "s4", "s5", "${R^2}$"] yfit = s1 * ((s2 + (xfit / s3))**(-s4)) + s5 yfitspot = s1 * ((s2 + (np.asarray(x) / s3))**(-s4)) + s5 elif (fun == 2): s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) r2 = float(eachdata[3]) yfit = s1 * (np.exp(-(xfit / s2))) + s3 yfitspot = s1 * (np.exp(-(np.asarray(x) / s2))) + s3 flmain = re.sub(r'[A-Za-z]', "", fl.split("-")[1]) # flmain=fl #姐蕾姐数据 # flmain2 = flmain + "指数拟合" + "(优度:" + eachdata[-3] + ")" flmain2 = flmain + "指数拟合" paras.append([flmain, s1, s2, s3, r2]) elif (fun == 3): s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) s4 = float(eachdata[3]) s5 = float(eachdata[4]) r2 = float(eachdata[5]) paras.append([s1, s2, s3, s4, s5, r2]) yfit = 0 yfitspot = 0 elif (fun == 4): s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) s4 = float(eachdata[3]) s5 = float(eachdata[4]) r2 = float(eachdata[5]) paras.append([s1, s2, s3, s4, s5, r2]) yfit = s1 * ((s2 + (xfit / s3))**(-s4)) + s5 yfitspot = s1 * ((s2 + (np.asarray(x) / s3))**(-s4)) + s5 flmain = re.sub(r'[A-Za-z]', "", fl.split("-")[1]) # 张老师拟合 # flmain =fl.split(".")[0] #佳蕾姐拟合 flmain = flmain + "双曲线拟合" + "(优度:" + eachdata[-3] + ")" elif (fun == 5): print(eachdata) s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) r2 = float(eachdata[3]) TimeSpan = float(eachdata[5]) print(TimeSpan) yfit = s1 * np.exp(-(xfit / s2)) + s3 temp1spot = np.exp(-np.asarray(x) / s2) temp2spot = np.exp(-(np.asarray(x) + TimeSpan) / s2) yfitspot = s1 * s2 * (temp1spot - temp2spot) + s3 * TimeSpan print(yfitspot) flmain = re.sub(r'[A-Za-z]', "", fl.split("-")[1]) paras.append([flmain, s1, s2, s3, r2]) flmain2 = flmain + "指数积分拟合" # flmain=fl #姐蕾姐数据 # flmain2 = flmain + "指数积分形式拟合" + "(优度:" + eachdata[-3] + ")" elif (fun == 6): xfit = np.linspace(x[0], x[-1] + (x[1] - x[0]), 1000) s1 = float(eachdata[0]) s2 = float(eachdata[1]) s3 = float(eachdata[2]) s4 = float(eachdata[3]) s5 = float(eachdata[4]) rs = float(eachdata[5]) col_labels = ["测量点", "s1", "s2", "s3", "s4", "s5", "${R^2}$"] # print(s1,s2,s3,s4,s5) TimeSpan = float(eachdata[7]) # fun = float(eachdata[7]) temp1spot = (1 / (1 + np.asarray(x) / s3))**(s4 - 1) temp2spot = (1 / (1 + (np.asarray(x) + TimeSpan) / s3))**(s4 - 1) yfit = s1 * ((s2 + (xfit / s3))**(-s4)) + s5 yfitspot = s1 * s3 * (1 / (s4 - 1)) * (temp1spot - temp2spot) + s5 * TimeSpan flmain = re.sub(r'[A-Za-z]', "", fl.split("-")[1]) # 张老师数据必要过程 # flmain = flmain + "双曲线积分拟合" + "(优度:" + eachdata[-3] + ")" paras.append([flmain, s1, s2, s3, s4, s5, rs]) flmain2 = flmain + "双曲线积分拟合" yfitlist.append(yfitspot) # color=colorlist[colornum] colornum += 1 plt.plot(xfit, yfit, color=color, label=flmain2) row_labels.append(flmain) # for eachLine in alllines: # eachdata = eachLine.split() # x.append(float(eachdata[0])) # y.append(float(eachdata[1])) # print(colors) # col_labels = ['col1', 'col2', 'col3'] # row_labels = ['row1', 'row2', 'row3'] # table_vals = [[11, 12, 13], [21, 22, 23], [28, 29, 30]] # row_labels=["1","2"] paras = np.array(paras).T # plt.table(cellText=paras,colWidths=[4]*len(col_labels),rowLabels=row_labels, colLabels=col_labels,loc='top',fontsize=5.0,picker=0.5) # 转置 # plt.table(cellText=paras,rowLabels=row_labels,loc='best', colLabels=col_labels) for i in range(len(ylist)): print(getIndexes(yfitlist[i], ylist[i])) plt.title(title) plt.xlabel("time/ms", size=12) plt.ylabel("cps", size=12) font1 = {'size': 10} plt.legend(prop=font1) plt.legend() # plt.table(cellText=paras, colWidths=[0.1,0.1], rowLabels=col_labels, loc='best', colLabels=row_labels,in_layout="TRUE", # fontsize=20) # 转置 plt.subplot(2, 1, 2, frameon=True, xticks=[], yticks=[]) plt.gca().spines['right'].set_color('none') plt.gca().spines['top'].set_color('none') plt.gca().spines['bottom'].set_color('none') plt.gca().spines['left'].set_color('none') # print(paras) # print(len(col_labels)) the_table = plt.table(cellText=paras, colWidths=[0.12] * len(paras), fontsize=5, loc='center', cellLoc='center') plt.title('参数列表') # the_table=plt.table(cellText=paras, colWidths=[0.2] * len(col_labels), rowLabels=col_labels, # colLabels=row_labels, fontsize=5, alpha=0.5,loc='center',cellLoc='center') the_table.set_fontsize(20) the_table.scale(2.5, 2.58) plt.show()
np.argmin(arrayCTCFdistance[:, uppermm]) / np.size(arrayCTCFMissmatchGlobal, 0), 2) offtarget_data = np.vstack( (arrayprofileMissmatch[uppermm], arrayexonsMissmatch[uppermm], arrayintronsMissmatch[uppermm], arraypromotersMissmatch[uppermm], arrayDNAseMissmatch[uppermm], arrayCTCFMissmatch[uppermm])) distance_data = np.vstack( (general_distance, exons_distance, introns_distance, promoters_distance, dnase_distance, ctcf_distance)) table_data = np.concatenate((distance_data, offtarget_data), axis=1) plt.subplot(2, 2, 2) table = plt.table(cellText=table_data, rowLabels=rows, colLabels=columns, loc='center', colWidths=[0.35 for x in columns]) table.auto_set_font_size(False) table.set_fontsize(18) table.scale(1, 3) plt.axis('off') datacount = arrayguidesExtendedProfile[missmatch*7] / \ (max(arrayguidesExtendedProfile[missmatch*7])) data = np.array(datacount, dtype=float) data = np.around(data, decimals=1) data.shape = (1, len(datacount)) string = guide[0:20] strArray = np.array([list(string)])
print("epoch:", epoch, "sentence: %s/%s" % (j, len(sentences)), "loss", loss) j += 1 print("Elapsed time training:", datetime.now() - t0) plt.plot(losses) avg_bigram_loss = np.mean(bigram_losses) print("avg_bigram_loss:", avg_bigram_loss) plt.axhline(y=avg_bigram_loss, color='r', linestyle='-') def smoothed_loss(x, decay=0.99): y = np.zeros(len(x)) last = 0 for t in range(len(x)): z = decay * last + (1 - decay) * x[t] y[t] = z / (1 - decay**(t + 1)) last = z return y plt.plot(smoothed_loss(losses)) plt.show() plt.subplot(1, 2, 1) plt.table("Neural Network Model") plt.imshow(np.tan(W1).dot(W2)) plt.subplot(1, 2, 2) plt.title("Bigram probs") plt.imshow(bigram_probs) plt.show()
prevTemprature = None for temprature in tempratureList: if prevTemprature != temprature and temprature != 0: plt.axhline(temprature, color='gray', linewidth=0.5) prevTime = None for t in timeList: if prevTime != t and t != 0: plt.axvline(t, color='gray', linewidth=0.5) columns = ('Rate', 'GoalTemprature', 'HoldTime') cell_text = [] for line in output: row_text = [] row_text.append(str(line[0])) row_text.append(str(line[1])) if line[2] < 1 and not line[2] == 0: minutes = line[2] * 60 row_text.append(str(minutes) + " [min]") else: row_text.append(str(line[2]) + " [h]") cell_text.append(row_text) # Add a table at the bottom of the axes the_table = plt.table(cellText=cell_text, colLabels=columns, loc='bottom', bbox=[0, -0.6, 0.7, 0.4]) plt.subplots_adjust(left=0.2, bottom=0.4) plt.show()
def cap_f_bar(nodes,fig_format,style,title_font,figsize,directory,cap_f_inp,colors,names,kind,table_font,v_round): import matplotlib.pyplot as plt from calliope_graph.graphs import style_check cap_f = cap_f_inp.copy() style = style_check(style) plt.style.use(style) colors = colors[cap_f.index] cap_f.index = names[cap_f.index] cap_f = cap_f.round(v_round) if kind == 'bar': for i in nodes: cap_f[i].plot(kind='bar',stacked=True,color=colors,figsize=figsize,legend=False) plt.title('{} capacity factor'.format(names[i]),fontsize=title_font) plt.savefig('{}\{}capacity_factor.{}'.format(directory,i,fig_format),bbox_inches='tight',dpi=150) plt.show() elif kind == 'table': fig,(ax) = plt.subplots(1,figsize=figsize) table = plt.table(cellText=cap_f.values, rowColours= colors, rowLabels= cap_f.index, colLabels = nodes, loc='upper center', rowLoc ='center', colLoc='center', cellLoc='center') table.set_fontsize(table_font) table.scale(1, 2) plt.box(on=None) ax = plt.gca() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.title('Capacity Factor',fontsize=title_font) plt.savefig('{}\system_capacity_factor.{}'.format(directory,fig_format),bbox_inches='tight',dpi=150) else: raise ValueError('/kind/ should be one of the followings: \n 1. /table/ \n 2. /bar/')
fig.add_subplot(2, 2, i) x = np.arange(3) + 0.4 plt.bar(x, [ precip['wsl_sum'], precip['interpolated_sum'], precip['combiprecip_sum'] ], align='center', width=0.4) plt.xlim([0, 2.4 + 0.4]) labels = ['LWF', 'Interpolated', 'Combiprecip'] plt.xticks(x, labels) plt.title(seasonlabels[i - 1]) plt.ylabel('summed precipitation [mm]') # Add table with values below plt.table(cellText=[['%.1f' % precip['wsl_sum'],'%.1f' % precip['interpolated_sum'],'%.1f' % precip['combiprecip_sum']]],\ bbox = [0.0,-0.12, 1.0, 0.05],cellLoc='center',rowLoc='center',fontsize=20) # Save figure if season is full if i == 4: plt.suptitle(nowdate.strftime('%Y'), fontsize=40) saveas = '\precip_statistics_' + treenetstation + '_seasonalsum_' plt.savefig(figpath + saveas + nowdate.strftime('%Y') + '.png', bbox_inches='tight') # Increase subplot index i += 1 #--------------------------------------------------------- # Create plots of yearly sums #--------------------------------------------------------- # Find latest starting date
cell_text = [] for row in range(n_rows): plt.plot(index, data[row], bar_width, color=colors[row]) y_offset = y_offset + data[row] cell_text.append(['%1.1f' % (x / 1000.0) for x in y_offset]) # Reverse colors and text labels to display table contents with # color. colors = colors[::-1] cell_text.reverse() # Add a table at the bottom the_table = plt.table(cellText=cell_text, rowLabels=rows, rowColours=colors, colLabels=columns, loc='bottom') # make space for the table: plt.subplots_adjust(left=0.2, bottom=0.2) plt.ylabel("Price in Rs.{0}'s".format(value_increment)) plt.yticks(values * value_increment, ['%d' % val for val in values]) plt.xticks([]) plt.title('Cost price increase') # plt.show()-display graph # Create image. plt.savefig ignores figure edge and face color. fig = plt.gcf() plt.savefig('pyplot-table-original.png', bbox_inches='tight',
def plot_alg(df, data_df, mk, mn, colors, ylims, p_val_dict, mode): """ mk: Metric Key mn: Metric Name """ if mode != 'ALL': plt.figure(figsize=(12,2)) else: plt.figure(figsize=(12,3)) #plt.ylim([0,100]) plt.grid(True) plt.xlabel('Prevalence (%)') plt.ylabel(mn + ' (%)') plt.title('Prevalence vs. ' + mn) frame1 = plt.gca() frame1.axes.get_xaxis().set_visible(False) frame1.axes.get_yaxis().set_visible(True) frame1.set_facecolor('white') if len(df['alg'].unique()) == 3: pos = [-0.015, 0, 0.015] elif (len(df['alg'].unique()) == 5): pos = [-0.01, -0.005,0,0.005, 0.01] cell_text = [] table_colors = [] table_rows = [] table_columns = ['0.1%', '0.2%', '0.3%', '0.4%', '0.5%'] for c, alg in enumerate(df['alg'].unique()): print(mn, alg, c, colors[alg]) tdf = df.loc[df['alg'] == alg][['prevalence', mk, mk + '_025', mk + '_975']].sort_values(by=['prevalence']) plt.scatter(tdf['prevalence'] + pos[c], tdf[mk], color=colors[alg], marker='o', label=alg, s=20) plt.errorbar(tdf['prevalence'] + pos[c], tdf[mk], yerr=[tdf[mk + '_025'], tdf[mk + '_975']], ecolor=colors[alg], capsize=3, barsabove=True, ls='none', linewidth=1) plt.ylim(ylims[mk]) table_colors.append(colors[alg]) table_rows.append(alg) # print(tdf[mk]) # print(tdf[mk + '_025']) # print(tdf[mk + '_975']) if (mk != 'npv'): if (mk in ['sens', 'spec', 'npv', 'per_det']): cell_text.append(['%0.1f (%0.1f - %0.1f)' % ((x,y,z)) for (x,y,z) in zip(tdf[mk], tdf[mk] - tdf[mk + '_025'], tdf[mk] + tdf[mk + '_975'])]) else: cell_text.append(['%0.2f (%0.2f - %0.2f)' % ((x,y,z)) for (x,y,z) in zip(tdf[mk], tdf[mk] - tdf[mk + '_025'], tdf[mk] + tdf[mk + '_975'])]) else: cell_text.append(['%0.2f (%0.2f - %0.2f)' % ((x,y,z)) for (x,y,z) in zip(tdf[mk], tdf[mk] - tdf[mk + '_025'], tdf[mk] + tdf[mk + '_975'])]) for key in p_val_dict.keys(): p_val_list = [] table_rows.append(key) table_colors.append('white') for prev in tdf['prevalence'].unique(): pvd = p_val_dict[key] tdf2 = data_df.loc[(data_df['prevalence'] == prev) & (data_df['alg'].isin(pvd))] alg1 = tdf2.loc[tdf2['alg'] == pvd[0]][mk] alg2 = tdf2.loc[tdf2['alg'] == pvd[1]][mk] # Step 1. Check normality _, norm_p_1 = st.shapiro(alg1) _, norm_p_2 = st.shapiro(alg2) mean_1 = np.mean(alg1) mean_2 = np.mean(alg2) print(mn) if ((norm_p_1 < 0.05) or (norm_p_2 < 0.05)): # Not normal, use mann-whitney U statistic stat, p = st.mannwhitneyu(alg1, alg2) print('NON-NORMAL - mean_1: %0.5f, mean_2: %0.4f, test_stat: %0.4f, p-val: %0.4f'\ %(mean_1, mean_2, stat, p)) if (p < 0.0001): p_val_string = '<0.0001' else: p_val_string = ('%0.4f' % (p)) p_val_list.append(p_val_string) else: # Both normal, use 1 sided t-test big = None small = None if (mean_1 > mean_2): big = alg1 small = alg2 elif (mean_1 <= mean_2): big = alg2 small = alg1 stat, p = st.ttest_ind(big, small, equal_var=False) print('NORMAL - mean_1: %0.4f, mean_2: %0.4f, test_stat: %0.4f, p-val: %0.40f'\ %(np.mean(big), np.mean(small), stat, p)) s1sq = np.var(big) n1 = len(big) v1 = n1-1 s2sq = np.var(small) n2 = len(small) v2 = n2-1 my_dof = ((s1sq/n1 + s2sq/n2)**2)/((s1sq**2)/((n1**2)*v1) + (s2sq**2)/((n2**2)*v2)) man_p_val = 1 - st.t.cdf(stat, my_dof) print('Manually calculated p-val: %0.10f' % (man_p_val)) if (p < 0.0001): p_val_string = '<0.0001' else: p_val_string = ('%0.4f' % (p)) p_val_list.append(p_val_string) pass input('Batman') cell_text.append(p_val_list) # print('\n\n\nTable Rows:') # print(table_rows) # print('\n\n\nCell Text:') # print(cell_text) # print('\n') # table_rows.append(key) # print('NOW PRINTING TABLE ROWS!') # print(table_rows) # print('Now printing cell text!') # print(cell_text) # for key in p_val_dict.keys(): # Iterate through p-values, update row labels and calculate p-values for each prevalence # print(key) # # table_rows.append(key) # # cell_text.append(['0' for x in range(1,6)]) # print(table_rows) # print(table_columns) # print(cell_text) # print('\n' + metric) # p_val_list = [] # for i in range(0,len(A['prev'])): # # Okay. I need to determine the p-values to show in the table. # f_data = e(F[metric + '_data'][i]) # e_data = e(E2e[metric + '_data'][i]) # # Step 1. Check if both are normal: # _, f_norm_p = st.shapiro(f_data) # _, e_norm_p = st.shapiro(e_data) # f_mean = np.mean(f_data) # e_mean = np.mean(e_data) # if ((f_norm_p < 0.05) or (e_norm_p < 0.05)): # Not normal, use mann-whitney U-statistic # stat, p = st.mannwhitneyu(f_data, e_data) # print('i:%d, NON-NORMAL - f-mean: %0.3f, e-mean: %0.3f, test_stat: %0.3f, p-val: %0.3f'\ # %(i, f_mean, e_mean, stat, p)) # if (p < 0.001): # p_val_string = '<0.001' # else: # p_val_string = ('%0.3f' % (p)) # p_val_list.append(p_val_string) # else: # both normal, use 1 sided t-test # # My goal is to test if one is bigger than the other # if (e_mean > f_mean): # big = e_data # small = f_data # else: # big = f_data # small = e_data # stat, p = st.ttest_ind(big, small, equal_var=False) # print('i:%d, NORMAL - f-mean: %0.3f, e-mean: %0.3f, test_stat: %0.3f, p-val: %0.10f'\ # %(i, f_mean, e_mean, stat, p)) # s1sq = np.var(big) # n1 = len(big) # v1 = n1-1 # s2sq = np.var(small) # n2 = len(small) # v2 = n2-1 # my_dof = ((s1sq/n1 + s2sq/n2)**2)/((s1sq**2)/((n1**2)*v1) + (s2sq**2)/((n2**2)*v2)) # man_p_val = 1 - st.t.cdf(stat, my_dof) # print('Manually calculated p-val: %0.10f' % (man_p_val)) # if (p < 0.001): # p_val_string = '<0.001' # else: # p_val_string = ('%0.3f' % (p)) # p_val_list.append(p_val_string) # pass # cell_text.append(p_val_list) #table_rows.append('APRI vs. ENS_APRI p-value') ts_x =0 te_x = 1-ts_x ts_y = -0.75 te_y = 0.75 the_table = plt.table(cellText=cell_text, rowLabels=table_rows, rowColours=table_colors, colLabels=table_columns, cellLoc='center', bbox = [ts_x,ts_y,te_x,te_y], # (left-x, bottom-y, length-x, length-y) loc='bottom') the_table.auto_set_font_size(False) for (row, col), cell in the_table.get_celld().items(): if (row == 0): cell.set_text_props(fontproperties=FontProperties(weight='bold')) plt.show() return None
print("missing : ", full_path) # print("MSE : ", mse) # print("MSE mean", np.mean(mse)) cell_text_row.append("%.3f" % mean_squared_error(Y, results) + " // " + "%.3f" % r2_score(Y, results)) cell_text.append(cell_text_row) title = "{} trace-regression hyperparameter evaluation".format( modality) the_table = plt.table( cellText=cell_text, rowLabels=rows, colLabels=columns, loc="center", ) the_table.scale(4, 2.5) plt.draw() plt.title(title) plt.savefig( os.path.join( path, "regression_hyperparameters_{}.png".format(titles[i])), dpi=fig.dpi, bbox_inches="tight", pad_inches=0.5, ) i += 1
def validation(M,df_encoded,results,Z,method,min_K,max_K,automatic=None,pp=None,gap=None,Tp=None): ############################################################################## # HOW MANY CLUSTERS? ############################################################################### # bootstrap method - sampling without replacement #dictionary to store all computed indexes for each number of clusters K=min_K,...max_K nn_history = defaultdict(dict) trees = defaultdict(dict) dicio_statistics = {k:{} for k in range(min_K,max_K)} for k in range(min_K,max_K): for index in indexes: dicio_statistics[k][index] = [] c_assignments_original = cut_tree(Z, k) # list of clusters for the clustering result with the original data partition_original = cluster_indices(c_assignments_original, df_encoded.index.tolist()) trees[k] = partition_original #for each bootstrap sample for i in range(M): # sampling rows of the original data idx = np.random.choice(len(df_encoded), int((3/4)*len(df_encoded)), replace = False) idx = np.sort(idx) #get all the possible combinations between the sampled patients patient_comb_bootstrap = list(itertools.combinations(df_encoded.loc[idx,'id_patient'],2)) patient_comb_bootstrap = pd.DataFrame(patient_comb_bootstrap,columns = ['patient1','patient2']) #extract the scores regarding the previous sampled combinations to be used in hierarchical clustering results_bootstrap = pd.merge(results, patient_comb_bootstrap, how='inner', on=['patient1','patient2']) # Hierarchical Clustering of the bootstrap sample Z_bootstrap = linkage(results_bootstrap['score'],method) #for each number of clusters k=min_K,...,max_K for k, partition in trees.items(): c_assignments_bootstrap = cut_tree(Z_bootstrap,k) #list of clusters for the clustering result with the bootstrap sample partition_bootstrap = cluster_indices(c_assignments_bootstrap,idx) #compute 4 different cluster external indexes between the partitions #computed_indexes = cluster_external_index(partition,partition_bootstrap) computed_indexes = clustereval.calculate_external(partition, partition_bootstrap) #print(computed_indexes) for pos, index in enumerate(external_indexes): dicio_statistics[k][index].append(computed_indexes[pos]) for k, partition in trees.items(): calc_idx = clustereval.calculate_internal(results[['patient1', 'patient2', 'score']], partition, k, trees[max_K - 1]) for index in internal_indexes: dicio_statistics[k][index].append(calc_idx[index]) ########################################################################### # DECISION ON THE NUMBER OF CLUSTERS # The correct number of clusters is the k that yield most maximum average values of # clustering indices. # Also the k found before needs to have a low value of standard deviation - it has to # be the minimum between all k's or a value that is somehow still low compared to others ########################################################################### #dataframe that stores the clustering indices averages for each k col = indexes.copy() col.extend(['k', 'k_score_avg']) df_avgs = pd.DataFrame(index = range(min_K,max_K),columns = col, dtype='float') #dataframe that stores the AR and AW indices standard deviations for each k df_stds = pd.DataFrame(index = range(min_K,max_K),columns = col, dtype = 'float') #computing the means and standard deviations for k in range(min_K,max_K): df_avgs.loc[k]['k'] = k df_stds.loc[k]['k'] = k for index in indexes: if index not in internal_indexes: df_avgs.loc[k][index] = mean(dicio_statistics[k][index]) df_stds.loc[k][index] = stdev(dicio_statistics[k][index]) else: df_avgs.loc[k][index] = dicio_statistics[k][index][0] df_stds.loc[k][index] = dicio_statistics[k][index][0] df_avgs.loc[k]['k_score_avg'] = 0 df_stds.loc[k]['k_score_std'] = 0 #df_stds.loc[k]['k_score_std_2'] = 0 #weights given to each clustering indice, Rand Index does not value as much as the other indices weights = {index: 1/len(indexes) for index in indexes} #found the maximum value for each clustering index and locate in which k it happens # compute the scores for each k as being the sum of weights whenever that k has maximums of clustering indices columns = df_avgs.columns analyzed_columns = columns[2:-3] for column in analyzed_columns: if column in min_indexes: idx_min = df_avgs[column].idxmin() df_avgs.loc[idx_min]['k_score_avg'] = df_avgs.loc[idx_min]['k_score_avg'] + weights[column] continue idx_max = df_avgs[column].idxmax() df_avgs.loc[idx_max]['k_score_avg'] = df_avgs.loc[idx_max]['k_score_avg'] + weights[column] #idx_min_s_dbw = df_avgs['s_dbw'].idxmin() #idx_min_cvnn = df_avgs['cvnn'].idxmin() #df_avgs.loc[idx_min_s_dbw]['k_score_avg'] = df_avgs.loc[idx_min_s_dbw]['k_score_avg'] + weights['s_dbw'] #df_avgs.loc[idx_min_cvnn]['k_score_avg'] = df_avgs.loc[idx_min_cvnn]['k_score_avg'] + weights['cvnn'] #final number of clusters chosen by analysing df_avgs final_k = df_avgs['k_score_avg'].idxmax() if(automatic==0 or automatic==1): fig1 = plt.figure(figsize=(10,5)) ax = plt.gca() ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.axis('tight') ax.axis('off') #colLabels=df_avgs.loc[:, df_avgs.columns != 'k_score_avg'].columns colLabels1 = external_indexes.copy() colLabels1.append('k') cell_text1 = [] for row in range(len(df_avgs)): cell_text1.append(df_avgs.iloc[row,list(range(len(external_indexes))) + [-2]].round(decimals=3)) plt.title('Average values of eleven external indices \n gap: %.2f, Tp: %.2f, %s link' %(gap,Tp,method)) the_table = plt.table(cellText=cell_text1, colLabels=colLabels1, loc='center',cellLoc='center') #the_table.auto_set_font_size(False) #the_table.set_fontsize(4) fig1.text(0.1, 0.01, "R = Rand, AR = Adjusted Rand, FM = Fowlkes and Mallows, J = Jaccard, AW = Adjusted Wallace, " "VD = Van Dongen, H = Huberts, H' = Huberts Normalized, F = F-Measure, " "VI = Variation of information, MS = Minkowski", fontsize=5) pp.savefig(fig1) fig2 = plt.figure(3, figsize=(10, 5)) ax = plt.gca() ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.axis('tight') ax.axis('off') # colLabels=df_avgs.loc[:, df_avgs.columns != 'k_score_avg'].columns colLabels2 = internal_indexes.copy() colLabels2.append('k') cell_text2 = [] for row in range(len(df_avgs)): cell_text2.append(df_avgs.iloc[row, list(range(len(external_indexes), len(indexes))) + [-2]].round(decimals=3)) plt.title('Average values of six internal indices \n gap: %.2f, Tp: %.2f, %s link' % (gap, Tp, method)) plt.table(cellText=cell_text2, colLabels=colLabels2, loc='center', cellLoc='center', fontsize=20) pp.savefig(fig2) #bar chart of standard deviation - standard deviation of all measures # Create a figure instance # plt.figure(2) # df_stds.loc[:,df_stds.columns != 'k'].plot.bar(figsize=(15,8)) # plt.title('Standard deviation of five measures versus number of clusters',fontsize=25) # plt.xlabel('Number of clusters',labelpad=20,fontsize=20) # plt.ylabel('Standard deviation',labelpad=10,fontsize=20) # plt.xticks(size = 20) # plt.yticks(size = 20) # plt.show() fig3 = plt.figure(4) df_stds.loc[:,'AR'].plot.bar(figsize=(15,8),color='forestgreen') plt.title('Standard deviation of Adjusted Rand versus number of clusters \n gap: %.2f, Tp: %.2f, %s link' %(gap,Tp,method),fontsize=25) plt.xlabel('Number of clusters',labelpad=20,fontsize=15) plt.ylabel('Standard deviation',labelpad=10,fontsize=15) plt.xticks(size = 20) plt.yticks(size = 20) #plt.show() pp.savefig(fig3) return [df_avgs,df_stds,final_k]