def volcano(self): """Calls :class:`VolcanoANOVA` on the results x-value is sign(FEATURE_delta_MEAN_IC50) times FEATURE_IC50_effect_size y-value is the FDR correction """ self.handle_volcano = VolcanoANOVA(self.df, settings=self.settings) self.handle_volcano.volcano_plot_all()
def create_pictures(self): v = VolcanoANOVA(self.df, settings=self.settings) v.volcano_plot_one_drug(self.drug) v.savefig_and_js('images/volcano_{}'.format(self.drug), size_inches=(10,10)) # See https://github.com/CancerRxGene/gdsctools/issues/79 v.current_fig.canvas.mpl_disconnect(v.cid) try: import mpld3 mpld3.plugins.clear(v.current_fig) except: pass
def volcano(self, settings=None): """Calls :class:`VolcanoANOVA` on the results x-value is sign(FEATURE_delta_MEAN_IC50) times FEATURE_IC50_effect_size y-value is the FDR correction See the online documentation for details on gdsctools.readthedocs.io. """ if len(self.df) == 0: print("No data to plot") return self.handle_volcano = VolcanoANOVA(self.df, settings=settings) self.handle_volcano.volcano_plot_all()
def create_pictures(self): v = VolcanoANOVA(self.df, settings=self.settings) v.volcano_plot_one_feature(self.feature) v.savefig_and_js('images/volcano_{}'.format(self.feature)) # See https://github.com/CancerRxGene/gdsctools/issues/79 v.current_fig.canvas.mpl_disconnect(v.cid) try: import mpld3 mpld3.plugins.clear(v.current_fig) except: pass
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % {'N': N} # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join([self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv'} filename = os.sep.join([self.settings.directory, 'code','rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()
def test_volcano_plot(): an = ANOVA(ic50_test) an.features.df = an.features.df[an.features.df.columns[0:10]] an = ANOVA(ic50_test, genomic_features=an.features.df) results = an.anova_all() # try the constructors v = VolcanoANOVA(results.df) v = VolcanoANOVA(results) # the selector metho v.df = v.selector(v.df) v.settings.savefig = False # some of the plotting v.volcano_plot_all_drugs() v.volcano_plot_all_features() v.volcano_plot_all() v._get_fdr_from_pvalue_interp(1e-10) v._get_pvalue_from_fdr(50) v._get_pvalue_from_fdr([50,60])
class ANOVAResults(object): """Class to handle results of the ANOVA analysis The :class:`ANOVA` performs the regression and ANOVA analysis and returns an :class:`ANOVAResults` instance (e.g., when you call :meth:`gdsctools.anova.ANOVA.anova_all` method). The :class:`ANOVAResults` contains a dataframe with all results in :attr:`df`. The columns of the dataframe are defined as follows: =========================== =============================================== Column name Description =========================== =============================================== ASSOC_ID Alphanumerical identifier of the interaction FEATURE The CFE involved in the interaction, it can be a mutated cancer driver gene (CG) [suffix _mut], an abberrantly fused protein [suffix fusion], a copy number altered chromosomal region (RACS) [prefix gain for amplifications or loss for deletions]; DRUG_ID Numerical id of the drug involved in the interaction; DRUG_TARGET Putative target of the drug involved in the interaction; N_FEATURE_pos Number of cell lines harbouring the CFE indicated in column E and that have been screened with the drug indicated in columns F and G, therefore have been included in the test; N_FEATURE_neg Number of cell lines not harbouring the CFE indicated in column E and that have been screened with the drug indicated in columns F and G, therefore have been included in the test; FEATURE_pos_logIC50_MEAN Average log IC50 of the population of cell lines accounted in colum i; FEATURE_neg_logIC50_MEAN Average log IC50 of the population of cell lines accounted in colum j; FEATURE_delta_MEAN_IC50 Difference between the two average natural log IC50 values in the previous two columns (j - i). A negative value indicates an interaction for sensitivity, whereas a positive value indicates an interaction for resistance; FEATURE_pos_IC50_sd Log IC50 Standard deviation for the population of cell lines accounted in column i; FEATURE_neg_IC50_sd Log IC50 Standard deviation for the population of cell lines accounted in column j; FEATURE_IC50_effect_size Cohen's d, quantifying the effect size of the interaction. A value >=0.5 indicates a moderate effect size. A value >=1 indicates a large effect size (i.e. difference in mean log IC50 values greater than their pooled standard deviations). A value >= 2 indicates a very large effect size (i.e. difference in mean log IC50 is at least two times their pooled standard deviation); FEATURE_pos_Glass_delta Glass delta, quantifying the effect size of the interaction as the ratio between the difference of the mean log IC50 values and the standard deviation of the log IC50 values of the population of cell lines accounted in column i; FEATURE_neg_Glass_delta Glass delta Same as above for the negative set. ANOVA_FEATURE_pval ANOVA test p-value quantyfing the interaction significance; ANOVA_TISSUE_pval ANOVA test p-value quantifying the significance of the interaction between drug response and the tissue of origin of the cell lines; for the cancer-specific interactions this value is NA; ANOVA_MEDIA_pval ANOVA test p-value quantifying the significance of the interaction between drug response and the screening medium of the cell lines; for the cancer-specific interactions this value is NA; ANOVA_MSI_pval ANOVA test p-value quantifying the significance of the interaction between drug response and the micro-satellite instability status of the cell lines; for the cancer type with no micro-satellite instable cell line samples this value is NA; ANOVA_FEATURE_FDR False discovery rate obtained by correcting the p-values in column u, on an individual analysis basis, for multiple hypothesis testing with the q-value correction method (Storey & TIbshirani, 2003) =========================== =============================================== Note that those column names are renamed internally (and if the data is saved in a new file). """ _colname_drug_id = 'DRUG_ID' def __init__(self, filename=None): """.. rubric:: Constructor :param str filename: Another ANOVAResults instance of a saved dataframe that can be read by this class, that is a CSV with the official header. This parameter can also be set to None (default) and populated later. """ if filename is not None and isinstance(filename, str): self.read_csv(filename) elif filename is None: self._df = pd.DataFrame() else: try: self._df = filename.df.copy() except: self._df = filename.copy() assert isinstance(self._df, pd.core.frame.DataFrame), \ "excepts a dataframe or filename" #: dictionary with the relevant column names and their expected types self.mapping = OrderedDict() self.mapping['ASSOC_ID'] = np.dtype('int64') self.mapping['FEATURE'] = np.dtype('O') self.mapping['DRUG_ID'] = np.dtype('O') self.mapping['DRUG_NAME'] = np.dtype('O') self.mapping['DRUG_TARGET'] = np.dtype('O') self.mapping['N_FEATURE_neg'] = np.dtype('int64') self.mapping['N_FEATURE_pos'] = np.dtype('int64') self.mapping['FEATURE_pos_logIC50_MEAN'] = np.dtype('float64') self.mapping['FEATURE_neg_logIC50_MEAN'] = np.dtype('float64') self.mapping['FEATURE_delta_MEAN_IC50'] = np.dtype('float64') self.mapping['FEATURE_IC50_effect_size'] = np.dtype('float64') self.mapping['FEATURE_neg_Glass_delta'] = np.dtype('float64') self.mapping['FEATURE_pos_Glass_delta'] = np.dtype('float64') self.mapping['FEATURE_neg_IC50_sd'] = np.dtype('float64') self.mapping['FEATURE_pos_IC50_sd'] = np.dtype('float64') self.mapping['FEATURE_IC50_T_pval'] = np.dtype('float64') self.mapping['ANOVA_FEATURE_pval'] = np.dtype('float64') self.mapping['ANOVA_TISSUE_pval'] = np.dtype('float64') self.mapping['ANOVA_MSI_pval'] = np.dtype('float64') self.mapping['ANOVA_MEDIA_pval'] = np.dtype('float64') self.mapping['ANOVA_FEATURE_FDR'] = np.dtype('float64') # before gdsctools, columns names were a bit different. # We need to rename some column names self.df.rename(columns={ 'assoc_id': 'ASSOC_ID', 'Drug id': 'DRUG_ID', 'Owned_by': 'OWNED_BY', 'FEATUREpos_IC50_sd': 'FEATURE_pos_IC50_sd', 'FEATUREneg_IC50_sd': 'FEATURE_neg_IC50_sd', 'FEATUREpos_Glass_delta': 'FEATURE_pos_Glass_delta', 'FEATUREneg_Glass_delta': 'FEATURE_neg_Glass_delta', 'FEATUREpos_logIC50_MEAN': 'FEATURE_pos_logIC50_MEAN', 'FEATUREneg_logIC50_MEAN': 'FEATURE_neg_logIC50_MEAN', 'Drug Target': 'DRUG_TARGET', 'FEATURE_deltaMEAN_IC50': 'FEATURE_delta_MEAN_IC50', 'FEATURE_ANOVA_pval': 'ANOVA_FEATURE_pval', 'ANOVA FEATURE FDR %': 'ANOVA_FEATURE_FDR', 'MSI_ANOVA_pval': 'ANOVA_MSI_pval', 'Tissue_ANOVA_pval': 'TISSUE_ANOVA_pval', 'Drug name': 'DRUG_NAME', 'A':'B'}, inplace=True) self.colnames_subset = [ 'ASSOC_ID', 'FEATURE', 'DRUG_ID', 'DRUG_NAME', 'DRUG_TARGET', 'N_FEATURE_neg', 'N_FEATURE_pos', 'FEATURE_pos_logIC50_MEAN', 'FEATURE_neg_logIC50_MEAN', 'FEATURE_delta_MEAN_IC50', 'FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta', 'ANOVA_FEATURE_pval', 'ANOVA_TISSUE_pval', 'ANOVA_MSI_pval', 'ANOVA_MEDIA_pval', 'ANOVA_FEATURE_FDR'] self._df.reset_index(drop=True) def astype(self, df): try: # does not work in python3.3 on travis but should work # we newer pandas version. df = df.apply(lambda x: pd.to_numeric(x, errors='ignore')) except: for col in df.columns: if col in self.mapping.keys(): df[col] = df[col].astype(self.mapping[col]) return df def _get_df(self): return self._df def _set_df(self, df): # TODO check that all columns are found and with correct type. self._df = df df = property(_get_df, _set_df, doc="dataframe with all results") def to_csv(self, filename): """Save dataframe into a file using comma separated values""" assert filename.endswith('.csv'), "filename should end in .csv" self.df.to_csv(filename, sep=',', index=False) def read_csv(self, filename): """Read a CSV file .. todo:: check validity of the header """ self.reader = readers.Reader(filename) self._df = self.reader.df def __len__(self): return len(self.df) def _get_drugIds(self): if len(self) == 0: return [] else: return self.df[self._colname_drug_id].unique() drugIds = property(_get_drugIds, doc="Returns the list of drug identifiers") def volcano(self): """Calls :class:`VolcanoANOVA` on the results x-value is sign(FEATURE_delta_MEAN_IC50) times FEATURE_IC50_effect_size y-value is the FDR correction """ self.handle_volcano = VolcanoANOVA(self.df, settings=self.settings) self.handle_volcano.volcano_plot_all() def __str__(self): txt = 'Total number of ANOVA tests performed: %s ' % len(self.df) return txt def __repr__(self): txt = 'ANOVAResults (%s tests): ' % len(self.df) return txt def copy(self): a = ANOVAResults(self.df.copy()) return def get_html_table(self, collapse_table=False, clip_threshold=2, index=False, header=True, escape=False): cmap_clip = cmap_builder('#ffffff', '#0070FF') cmap_absmax = cmap_builder('green', 'white', 'red') columns = ANOVAResults().colnames_subset # The copy is used because we'll change it afterwards df = self.df[self.colnames_subset].copy() colname = 'ANOVA_FEATURE_FDR' df.loc[self.df[colname] < 0.01, colname] = '<0.01' html = HTMLTable(self.df, 'notused') # Those columns should be links for this in ['FEATURE', 'DRUG_ID', 'ASSOC_ID']: html.add_href(this) for this in ['FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta']: html.add_bgcolor(this, cmap_clip, mode='clip', threshold=clip_threshold) # normalise data and annotate with color html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax') html.df.columns = [x.replace("_", " ") for x in html.df.columns] return html.to_html(escape=escape, header=header, index=index, collapse_table=collapse_table, justify='center') def barplot_effect_size(self): # barplot of the IC50 effect size data = np.sign(self.df.FEATURE_delta_MEAN_IC50) * self.df.FEATURE_IC50_effect_size data = data.sort_values() n_green = len(data[data<0]) n_red = len(data[data>=0]) print(n_green, n_red) data.plot(kind='barh', width=1, alpha=0.5, color=['green']*n_green + ['red'] * n_red) pylab.xlabel("Effect size") pylab.ylabel("Drug name")
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % { 'N': N } # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [ x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns ] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join( [self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv' } filename = os.sep.join([self.settings.directory, 'code', 'rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()