def test_get_color_pool(self): """Test grabbing list of good colors to use.""" obs = get_color_pool() self.assertEqual(len(obs), 27) obs2 = get_color_pool() self.assertFloatEqual(obs, obs2) self.assertFalse(obs is obs2)
def _collate_cluster_pcoa_plot_data(coords_f, map_f, category): pc_data = parse_coords(coords_f) coords_d = dict(zip(pc_data[0], pc_data[1])) map_data = parse_mapping_file(map_f) full_map_data = [map_data[1]] full_map_data.extend(map_data[0]) sid_map = group_by_field(full_map_data, category) sorted_states = sorted(sid_map.keys()) color_pool = get_color_pool() if len(sorted_states) > len(color_pool): raise ValueError("Not enough colors to uniquely color sample " "groups.") results = [] for state, color in zip(sorted_states, color_pool[:len(sorted_states)]): sids = sid_map[state] xs = [coords_d[sid][0] for sid in sids] ys = [coords_d[sid][1] for sid in sids] results.append((xs, ys, color, state)) return results
def create_simulated_data_plots(analysis_type, in_dir, workflow): """Create plots of sample size vs effect size/p-val for each dissim. Plots will be placed directly under in_dir and will be named according to the following convention: <study>_<category>_<depth>_<metric>.pdf """ for study in workflow: study_dir = join(in_dir, study) num_trials = workflow[study]['num_sim_data_trials'] methods = workflow[study]['methods'] if Best() in methods: methods.remove(Best()) if MantelCorrelogram() in methods: methods.remove(MantelCorrelogram()) num_methods = len(methods) num_rows = max(num_methods, len(workflow[study]['pcoa_dissim']) + 1) # test stat, p-val, legend/PCoA. num_cols = 3 for depth in workflow[study]['depths']: depth_dir = join(study_dir, '%d' % depth[0]) data_type_dir = join(depth_dir, 'simulated') for category in workflow[study]['categories']: category_dir = join(data_type_dir, category[0]) # metric -> Figure figs = {} for metric in workflow[study]['metrics']: figs[metric[0]] = figure(num=None, figsize=(20, 20), facecolor='w', edgecolor='k') for method_idx, method in enumerate(methods): # metric -> # dissim -> { # 'sample_sizes': list, # 'effect_sizes': list of lists, one for each size, # 'p_vals' -> list of lists, one for each size # } plots_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for trial_num in range(num_trials): trial_num_dir = join(category_dir, '%d' % trial_num) for samp_size in workflow[study]['sample_sizes']: samp_size_dir = join(trial_num_dir, '%d' % samp_size) for d in workflow[study]['plot_dissim']: dissim_dir = join(samp_size_dir, repr(d)) for metric in workflow[study]['metrics']: metric_dir = join(dissim_dir, metric[0]) method_dir = join(metric_dir, method.DirectoryName) results_fp = join(method_dir, '%s_results.txt' % method.ResultsName) effect_size, p_val = method.parse( open(results_fp, 'U')) if samp_size not in plots_data[metric[0]][d]['sample_sizes']: plots_data[metric[0]][d]['sample_sizes'].append(samp_size) plots_data[metric[0]][d]['effect_sizes'].append([]) plots_data[metric[0]][d]['p_vals'].append([]) samp_size_idx = plots_data[metric[0]][d]['sample_sizes'].index(samp_size) plots_data[metric[0]][d]['effect_sizes'][samp_size_idx].append(effect_size) plots_data[metric[0]][d]['p_vals'][samp_size_idx].append(p_val) for metric in workflow[study]['metrics']: fig = figs[metric[0]] metric_plots_data = plots_data[metric[0]] # plot_num is 1-based indexing. plot_num = method_idx * num_cols + 1 ax1 = fig.add_subplot(num_rows, num_cols, plot_num) ax2 = fig.add_subplot(num_rows, num_cols, plot_num + 1) color_pool = get_color_pool() min_dissim = min(metric_plots_data.keys()) max_dissim = max(metric_plots_data.keys()) legend_labels = [] legend_lines = [] for d, plot_data in sorted(metric_plots_data.items()): avg_effect_sizes, std_effect_sizes, avg_p_vals, std_p_vals = \ _compute_plot_data_statistics(plot_data, num_trials) color = color_pool.pop(0) label = 'd=%r' % d if d == 0.0: label += ' (actual data)' #elif d == max_dissim: # label += ' (neg. control)' legend_labels.append(label) legend_lines.append(Line2D([0, 1], [0, 0], color=color, linewidth=2)) # Make the actual data plot a bit thicker than # the rest. if d == 0.0: line_width = 3 else: line_width = 1 # Plot test statistics. ax1.errorbar(plot_data['sample_sizes'], avg_effect_sizes, yerr=std_effect_sizes, color=color, label=label, linewidth=line_width, fmt='-') # Plot p-values. _, _, barlinecols = ax2.errorbar( plot_data['sample_sizes'], avg_p_vals, yerr=std_p_vals, color=color, label=label, linewidth=line_width, linestyle='--') barlinecols[0].set_linestyles('dashed') ax1.set_xscale('log', nonposx='clip', basex=2) ax1.xaxis.set_major_formatter(FormatStrFormatter('%d')) ax2.set_xscale('log', nonposx='clip', basex=2) ax2.xaxis.set_major_formatter(FormatStrFormatter('%d')) ax2.set_yscale('log', nonposy='clip') x_label = 'Number of samples' ax1.set_xlabel(x_label) ax2.set_xlabel(x_label) ax1.set_ylabel('%s (%s)' % (method.DisplayName, method.StatDisplayName)) ax2.set_ylabel('p-value') min_x = min(workflow[study]['sample_sizes']) max_x = max(workflow[study]['sample_sizes']) ax1.set_xlim(min_x - 0.5, max_x) ax2.set_xlim(min_x - 0.5, max_x) for ax_idx, ax in enumerate((ax1, ax2)): panel_idx = method_idx * 2 + ax_idx panel_label = get_panel_label(panel_idx) xmin = ax.get_xlim()[0] ymin, ymax = ax.get_ylim() yrange = ymax - ymin # Not sure why the math isn't working out for the # p-value plots... if ax is ax1: factor = 0.05 else: factor = 0.60 ax.text(xmin, ymax + (factor * yrange), '(%s)' % panel_label) if method_idx == 0: ax3 = fig.add_subplot(num_rows, num_cols, plot_num + 2, frame_on=False) ax3.get_xaxis().set_visible(False) ax3.get_yaxis().set_visible(False) start_panel_label = get_panel_label(0) end_panel_label = \ get_panel_label(num_methods * 2 - 1) if analysis_type == 'gradient': loc='center' elif analysis_type == 'cluster': loc='center left' assert len(legend_lines) == len(workflow[study]['plot_dissim']) assert len(legend_labels) == len(workflow[study]['plot_dissim']) legend_title = (' Legend (Panels %s-%s)\nd = ' '"noise" introduced to samples' % ( start_panel_label, end_panel_label)) ax3.legend(legend_lines, legend_labels, ncol=1, title=legend_title, loc=loc, fancybox=True, shadow=True) for metric in workflow[study]['metrics']: fig = figs[metric[0]] # Plot PCoA in last column of figure. plot_pcoa(analysis_type, fig, category_dir, workflow[study], category, metric, num_rows, num_cols, num_methods) fig.tight_layout(pad=5.0, w_pad=2.0, h_pad=2.0) fig.savefig(join(in_dir, '%s_%s_%d_%s.pdf' % (study, category[0], depth[0], metric[0])), format='pdf') fig.savefig(join(in_dir, '%s_%s_%d_%s.png' % (study, category[0], depth[0], metric[0])), format='png', dpi=100)