def get_html_table(self, collapse_table=False, clip_threshold=2, index=False, header=True, escape=False): cmap_clip = cmap_builder('#ffffff', '#0070FF') cmap_absmax = cmap_builder('green', 'white', 'red') columns = ANOVAResults().colnames_subset # The copy is used because we'll change it afterwards df = self.df[self.colnames_subset].copy() colname = 'ANOVA_FEATURE_FDR' df.loc[self.df[colname] < 0.01, colname] = '<0.01' html = HTMLTable(self.df, 'notused') # Those columns should be links for this in ['FEATURE', 'DRUG_ID', 'ASSOC_ID']: html.add_href(this) for this in ['FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta']: html.add_bgcolor(this, cmap_clip, mode='clip', threshold=clip_threshold) # normalise data and annotate with color html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax') html.df.columns = [x.replace("_", " ") for x in html.df.columns] return html.to_html(escape=escape, header=header, index=index, collapse_table=collapse_table, justify='center')
def get_html_table(self, collapse_table=False, clip_threshold=2, index=False, header=True, escape=False, add_href=True): """Return an HTML table for the reports :param add_href: add href to the FEATURE, DRUG ID and ASSOC ID """ cmap_clip = cmap_builder('#ffffff', '#0070FF') cmap_absmax = cmap_builder('green', 'white', 'red') # The copy is used because we'll change it afterwards df = self.df[self.colnames_subset].copy() colname = 'ANOVA_FEATURE_FDR' df.loc[df[colname] < 0.01, colname] = '<0.01' # In the assoc column, we remove the first "a" letter so that # the column is properly sorted by Id but the link should be with the # "a" as prefix df.ASSOC_ID = df.ASSOC_ID.apply(lambda x: int(str(x).replace("a", ""))) html = HTMLTable(df, 'notused') # Those columns should be links if add_href: html.add_href("FEATURE") html.add_href("ASSOC_ID", url="a", suffix=".html") # here url works like a prefix html.add_href("DRUG_ID", url="drug_", suffix=".html") # here url works like a prefix for this in [ 'FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta', 'FEATURE_pos_Glass_delta' ]: html.add_bgcolor(this, cmap_clip, mode='clip', threshold=clip_threshold) # normalise data and annotate with color html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax') html.df.columns = [x.replace("_", " ") for x in html.df.columns] return html.to_html(escape=escape, header=header, index=index, collapse_table=collapse_table, justify='center')
def _create_report(self, onweb=True): section = """<div> <b>Regression method:</b> %s </br> </div><hr> """ % self.caller.method self.jinja['sections'].append(section) filename = self.caller.prefix + "results.csv" df = pd.read_csv(filename) df['ttest (-log10)'] = -pylab.log10(df['ttest']) # prevents inf to fail in the HTMLTable table = HTMLTable(df) table.add_bgcolor('bayes') table.add_bgcolor('Rp') table.df['drugid'] = ['<a href="drug_%s.html">%s</a>' % (x,x) for x in table.df['drugid']] html = ("<div><p>This table contains links to all drugs. The Rp columns" " contains the coefficient of correlation found with the" " method for the give alpha parameter. The ln_alpha column" " is just the -log10(alpha) value. The bayes and ttest columns" " gives an idea of the significance of the correlation as " " compared to a null distribution.</p>") html += table.to_html(index=False) +"</div>" self.jinja['sections'].append(html) filename = self.caller.prefix + "scatter_plot.png" html = "<hr><div>" html += "<img src=%s></img>" % filename html += "/<div>" self.jinja['sections'].append(html) # The scatter plot. First the javascript in the header self._set_scatter() # and the section itself html = """ <div class="wrap"> <div class="content"> <center> <canvas id='canvasVolcano' width='800' height='540'></canvas> </center> </div> <div class="clear"> </div> </div> """ self.jinja["sections"].append(html)
def test_htmltable(): df = pd.DataFrame({ 'A':[1,2,10], 'B':[1,10,2], 'C':[1,10,2], 'url':['A', 'B', 'C']}) html = HTMLTable(df, 'test') html.add_href('url') html.add_bgcolor('A') html.add_bgcolor('B', mode='clip', threshold=2) html.add_bgcolor('C', mode='max', threshold=2) print(html.to_html())
def test_htmltable(): df = pd.DataFrame({ 'A': [1, 2, 10], 'B': [1, 10, 2], 'C': [1, 10, 2], 'url': ['A', 'B', 'C'] }) html = HTMLTable(df, 'test') html.add_href('url') html.add_bgcolor('A') html.add_bgcolor('B', mode='clip', threshold=2) html.add_bgcolor('C', mode='max', threshold=2) print(html.to_html())
def add_features(self): # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [ "{}-{}".format(x, get_name(x)) for x in df_drugs.index ] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') if len(self.report.df) == 0: return # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID', url="associations/drug_", suffix=".html") table.df.columns = [ x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns ] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE', url="associations/", suffix=".html") table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False)
def _create_report(self, onweb=True): # The top section with standard information section = """<div> <b>Regression method:</b> %s </br> </div><hr> """ % self.caller.method self.jinja['sections'].append(section) # The main CSV tables with bayes factor and links to each drug ID filename = self.caller.prefix_data + "results.csv" df = pd.read_csv(filename) df['ttest (-log10)'] = -pylab.log10(df['ttest']) # prevents inf to fail in the HTMLTable table = HTMLTable(df) table.add_bgcolor('bayes') table.add_bgcolor('Rp') table.df['drugid'] = [ '<a href="drug_%s.html">%s</a>' % (x, x) for x in table.df['drugid'] ] html = ( "<div><p>This table contains links to all drugs (first column)." " The Rp column contains the coefficient of correlation" " (pearson) found with the regression method for the alpha" " parameter provided in column 3. The alpha value is the optimised" " value obtained using a cross validation (see below)." " The ln_alpha column is just the -log10(alpha) value. The bayes" " factor gives an idea of the significance of the correlation as " " compared to a null distribution. See" ' <a href="http://gdsctools.readthedocs.io/en/master/references.html">' 'gdstools documentation.</a> for details.' "<br>" " Note also that the optimisation of the alpha parameter is" " performed using a cross validation and depends on a few" " parameters such as the range of alpha values, number of " " cross validation, ....</p>") html += table.to_html(index=False) pattern = '<div>%s <p>Download the CSV <a href="%s">file</a></p></div><hr>' pattern = pattern % (html, filename) html = pattern self.jinja['sections'].append(html) # The scatter plot. First the javascript in the header self._set_scatter() # and the section itself html = """ <div class="wrap"> <div class="content"> <center> <canvas id='canvasVolcano' width='800' height='540'></canvas> </center> </div> <div class="clear"> </div> </div> """ self.jinja["sections"].append(html)
def add_features(self): # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = ["{}-{}".format(x, get_name(x)) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') if len(self.report.df) == 0: return # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID', url="associations/drug_", suffix=".html") table.df.columns = [x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE', url="associations/", suffix=".html") table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False)
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % {'N': N} # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join([self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv'} filename = os.sep.join([self.settings.directory, 'code','rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()
def _create_report(self, onweb=True): # A summary table diag = self.report.diagnostics() table = HTMLTable(diag, 'summary') txt = '' for index, row in diag.iterrows(): if len(row.text) == 0 and len(row.value) == 0: txt += '----<br/>' else: txt += row.text + ": " + str(row.value) + "<br/>" self.jinja['summary'] = txt print('Creating volcano plots') # this can be pretty slow. so keep only 1000 most relevant # values and 1000 random ones to get an idea of the distribution v = VolcanoANOVA(self.report.df, settings=self.settings) v.selector(v.df, 1500, 1500, inplace=True) v.volcano_plot_all() v.savefig_and_js("volcano_all_js") self.jinja['volcano'] = """ <h3></h3> <a href="volcano_all_js.html"> <img alt="volcano plot for all associations" src="volcano_all_js.png"> </a> <br/> <p> A javascript version is available <a href="volcano_all_js.html">here</a> ( or click on the image).</p> """ # MANOVA link N = len(self.report.get_significant_set()) self.jinja['manova'] = """ There were %(N)s significant associations found. All significant associations have been gatherered in the following link: <br/><a href="manova.html">manova results</a>. """ % { 'N': N } # feature summary df_features = self.report.feature_summary("feature_summary.png") filename = 'OUTPUT' + os.sep + 'features_summary.csv' df_features.to_csv(self.directory + os.sep + filename, sep=',') # drug summary #not_tested = [x for x in self.report.gdsc.drugIds if x not in # self.report.df.DRUG_ID.unique()] #if len(not_tested) > 0: # not_tested = """%s drugs were not analysed due to # lack of valid data points: """ % len(not_tested) + \ # ", ".join(not_tested) #else: # not_tested = "" not_tested = "" self.jinja['drug_not_tested'] = not_tested df_drugs = self.report.drug_summary(filename="drug_summary.png") get_name = self.report.drug_decode.get_name if len(self.report.drug_decode.df) > 0: df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index] filename = 'OUTPUT' + os.sep + 'drugs_summary.csv' df_drugs.to_csv(self.directory + os.sep + filename, sep=',') # --------------------------- Create table with links to all drugs groups = self.report.df.groupby('DRUG_ID') try: df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values() except: # note double brackets for pythonn3.3 df = groups.mean()[['ANOVA_FEATURE_FDR']].sort() df = df.reset_index() # get back the Drug id in the dframe columns # let us add also the drug name df = self.report.drug_decode.drug_annotations(df) # let us also add number of associations computed counts = [len(groups.groups[k]) for k in df.DRUG_ID] df['Number of associations computed'] = counts groups = self.report.get_significant_set().groupby('DRUG_ID').groups count = [] for drug in df['DRUG_ID'].values: if drug in groups.keys(): count.append(len(groups[drug])) else: count.append(0) df['hits'] = count # add another set of drug_id but sorted in alpha numerical order table = HTMLTable(df, 'drugs') table.add_href('DRUG_ID') table.df.columns = [ x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR') for x in table.df.columns ] table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['drug_table'] = table.to_html(escape=False, header=True, index=False) # ---------------------- Create full table with links to all features df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()}) try: df.sort_values(by='FEATURE', inplace=True) except: df.sort('FEATURE', inplace=True) groups = self.report.get_significant_set().groupby('FEATURE').groups count = [] for feature in df['FEATURE'].values: if feature in groups.keys(): count.append(len(groups[feature])) else: count.append(0) df['hits'] = count table = HTMLTable(df, 'features') table.sort('hits', ascending=False) table.add_href('FEATURE') table.add_bgcolor('hits', mode='max', cmap=cmap_builder('white', 'orange', 'red')) self.jinja['feature_table'] = table.to_html(escape=False, header=True, index=False) # -------------------------------------- COSMIC table for completeness colnames = self.report.gdsc.features._special_names df = self.report.gdsc.features.df[colnames] # TODO # add other columns if possible e.g., GDSC1, GDSC2, TCGA df = df.reset_index() table = HTMLTable(df) url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id=" table.add_href('COSMIC_ID', url=url, newtab=True) self.jinja['cosmic_table'] = table.to_html() # -------------------------------------- settings and INPUT files input_dir = self.directory + os.sep + 'INPUT' filename = 'ANOVA_input.csv' filename = os.sep.join([input_dir, filename]) self.report.gdsc.ic50.to_csv(filename) filename = os.sep.join(['INPUT', 'ANOVA_input.csv']) self.jinja['ic50_file'] = filename # the genomic features, which may be the default version # one provided by the user. It may have been changed gf_filename = os.sep.join([input_dir, 'genomic_features.csv']) self.report.gdsc.features.to_csv(gf_filename) html = """Saved <a href="INPUT/genomic_features.csv">Genomic Features</a> file<br/> (possibly the default version).""" self.jinja['gf_file'] = html # Always save DRUG_DECODE file even if empty # It may be be interpreted in other pipeline or for reproducibility output_filename = input_dir + os.sep + 'DRUG_DECODE.csv' self.report.drug_decode.to_csv(output_filename) html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>' if len(self.report.drug_decode) == 0: html += 'Note that DRUG_DECODE file was not provided (empty?).' self.jinja['drug_decode'] = html # Save settings as json file filename = os.sep.join([input_dir, 'settings.json']) self.settings.to_json(filename) filename = os.path.basename(filename) self.jinja['settings'] = \ """Get the settings as a <a href="INPUT/%s"> json file</a>.""" % filename # Save all Results dataframe filename = os.sep.join( [self.settings.directory, 'OUTPUT', 'results.csv']) ANOVAResults(self.report.df).to_csv(filename) code = """from gdsctools import * import os def getfile(filename, where='../INPUT'): return os.sep.join([where, filename]) # reback the IC50 and genomic features matrices gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'), getfile('DRUG_DECODE.csv')) gdsc.settings.from_json(getfile('settings.json')) gdsc.init() # Analyse the data results = gdsc.anova_all() # Create the HTML report r = ANOVAReport(gdsc, results) r.create_html_pages(onweb=False)""" code = code % { 'ic50': 'ANOVA_input.csv', 'gf_filename': 'genomic_features.csv' } filename = os.sep.join([self.settings.directory, 'code', 'rerun.py']) fh = open(filename, 'w') fh.write(code) fh.close()