def create_mask(data, use_sig=True, p_value=0.1, fold_change_cutoff=1.5): """ Creates a mask for volcano plots. # Visual example of volcano plot # section 0 are significant criteria # 0 # 1 # 0 # # # # # ################################# # # # # # 2 # 2 # 2 # # # # # ################################# Parameters ---------- data : pd.DataFrame use_sig : bool p_value : float p_value threshold fold_change_cutoff : float fold change threshold Returns ------- """ # copy of data tmp = data.loc[:, (p_val, fold_change, flag)].copy() # convert to log10 scale tmp[p_val] = np.log10(data[p_val]) * -1 # convert to log2 space tmp = log2_normalize_df(tmp, column=fold_change) if use_sig: sec_0 = tmp[tmp[flag]] sec_2 = tmp[~tmp[flag]] sec_1 = None else: fc = np.log2(fold_change_cutoff) p_value = -1 * np.log10(p_value) criteria_1 = tmp[p_val] >= p_value sec_0 = tmp[criteria_1 & (np.abs(tmp[fold_change]) >= fc)] sec_1 = tmp[criteria_1 & (np.abs(tmp[fold_change]) < fc)] sec_2 = tmp[(tmp[p_val] < p_value)] return sec_0, sec_1, sec_2
def create_histogram_measurements(self, exp_data_type, save_name=None, y_range=None, out_dir=None): """ Plots a histogram of data Parameters ---------- exp_data_type: str Which data to plot save_name: str Name of figure out_dir: str, path Path to location to save figure y_range: array_like range of data Returns ------- """ if not self._check_experiment_type_existence(exp_type=exp_data_type): return data = self.data[self.data[exp_method] == exp_data_type].copy() data = data.dropna(subset=[p_val]) data = data[np.isfinite(data[fold_change])] data = data.dropna(subset=[fold_change]) tmp = np.array(log2_normalize_df(data, fold_change)[fold_change]) fig = plt.figure() ax = fig.add_subplot(111) ax.hist(tmp, 50, color='gray') if y_range is not None: plt.xlim(y_range[0], y_range[1]) ax.set_yscale('log', basey=10) ax.set_xlabel('log$_2$ Fold Change', fontsize=16) ax.set_ylabel('Count', fontsize=16) if save_name is not None: v_plot.save_plot(fig, save_name, out_dir) return fig
def create_histogram_measurements(self, save_name=None, y_range=None, out_dir=None): """ Plots a histogram of data Parameters ---------- save_name: str Name of figure out_dir: str, path Path to location to save figure y_range: array_like range of data Returns ------- """ data = self.copy() data = data.dropna(subset=[p_val]) data = data[np.isfinite(data[fold_change])] data = data.dropna(subset=[fold_change]) tmp = np.array(log2_normalize_df(data, fold_change)[fold_change]) fig = plt.figure() ax = fig.add_subplot(111) ax.hist(tmp, 50, color='gray') if y_range is not None: plt.xlim(y_range[0], y_range[1]) ax.set_yscale('log', basey=10) ax.set_xlabel('log$_2$ Fold Change', fontsize=16) ax.set_ylabel('Count', fontsize=16) fig.tight_layout() if save_name is not None: v_plot.save_plot(fig, save_name, out_dir) return fig
def plot_histogram(self, save_name=None, y_range=None, out_dir=None): """ Plots a histogram of data Parameters ---------- save_name: str Name of figure out_dir: str, path Path to location to save figure y_range: array_like range of data Returns ------- """ data = self.copy() data = data.dropna(subset=[p_val]) data = data[np.isfinite(data[fold_change])] data = data.dropna(subset=[fold_change]) tmp = np.array(log2_normalize_df(data, fold_change)[fold_change]) fig = plt.figure() ax = fig.add_subplot(111) ax.hist(tmp, 50, color='gray') if y_range is not None: plt.xlim(y_range[0], y_range[1]) ax.set_yscale('log', basey=10) ax.set_xlabel('log$_2$ Fold Change', fontsize=16) ax.set_ylabel('Count', fontsize=16) fig.tight_layout() if save_name is not None: v_plot.save_plot(fig, save_name, out_dir) return fig
def plot_species(df, species_list=None, save_name='test', out_dir=None, title=None, plot_type='plotly', image_format='pdf', close_plots=False): """ Parameters ---------- df: pandas.DataFrame magine formatted dataframe species_list: list List of genes to be plotter save_name: str Filename to be saved as out_dir: str Path for output to be saved title: str Title of plot, useful when list of genes corresponds to a GO term plot_type : str Use plotly to generate html output or matplotlib to generate pdf image_format : str pdf or png, only used if plot_type="matplotlib" close_plots : bool Close plot after making, use when creating lots of plots in parallel. Returns ------- """ ldf = df.copy() if out_dir is not None: if not os.path.exists(out_dir): os.mkdir(out_dir) # gather x axis points x_points = sorted(ldf[sample_id].unique()) if len(x_points) == 0: return if isinstance(x_points[0], np.float): x_point_dict = {i: x_points[n] for n, i in enumerate(x_points)} else: x_point_dict = {i: n for n, i in enumerate(x_points)} if species_list is not None: ldf = ldf.loc[ldf[identifier].isin(species_list)].copy() ldf = log2_normalize_df(ldf, column=fold_change) n_plots = len(ldf[identifier].unique()) num_colors = len(ldf[label_col].unique()) color_list = sns.color_palette("tab20", num_colors) if plot_type == 'matplotlib': fig = plt.figure() ax = fig.add_subplot(111) ax.set_prop_cycle(plt.cycler('color', color_list)) colors = enumerate(color_list) plotly = [] names_list = [] total_counter = 0 for name, j in ldf.groupby(identifier): index_counter = 0 for n, m in j.groupby(label_col): x = np.array(m[sample_id]) if len(x) < 1: continue y = np.array(m['fold_change']) sig_flag = np.array(m[flag]) index = np.argsort(x) x = x[index] y = y[index] s_flag = sig_flag[index] # x values with scaled values (only changes things if non-float # values are used for sample_id x_index = np.array([x_point_dict[ind] for ind in x]) index_counter += 1 total_counter += 1 # create matplotlib plot if plot_type == 'matplotlib': label = "\n".join(wrap(n, 40)) p = ax.plot(x_index, y, '.-', label=label) if len(s_flag) != 0: color = p[0].get_color() ax.plot(x_index[s_flag], y[s_flag], '^', color=color) # create plotly plot elif plot_type == 'plotly': c = next(colors)[1] plotly.append(_ploty_graph(x_index, y, n, n, c)) if len(s_flag) != 0: index_counter += 1 total_counter += 1 plotly.append(_ploty_graph(x_index[s_flag], y[s_flag], n, n, c, marker='x-open-dot')) names_list.append([name, index_counter]) if plot_type == 'matplotlib': lgd = _format_mpl(ax, x_point_dict, x_points) if save_name is not None: tmp_savename = "{}.{}".format(save_name, image_format) if out_dir is not None: tmp_savename = os.path.join(out_dir, tmp_savename) plt.savefig(tmp_savename, bbox_extra_artists=(lgd,), bbox_inches='tight') if close_plots: plt.close(fig) else: return fig elif plot_type == 'plotly': fig = _create_plotly(total_counter, n_plots, names_list, x_point_dict, title, x_points, plotly) if save_name: _save_ploty_output(fig, out_dir, save_name) else: init_notebook_mode(connected=True) iplot(fig)