def _create_main_index(self): # We could also add a column with number of association ? companies = self.companies[:] df = pd.DataFrame({"Company": companies}) html_page = ReportMain(directory=".", filename='index.html', template_filename='main_summary.html', mode="summary") html_table = HTMLTable(df) html_table.add_href('Company', newtab=True, url="company_packages/", suffix="/index.html") html_page.jinja['data_table'] = html_table.to_html( collapse_table=False) html_page.jinja['analysis_domain'] = "All companies / All " html_page.jinja['tissue_directory'] = self.main_directory html_page.write()
def _create_main_index(self): # We could also add a column with number of association ? companies = self.companies[:] df = pd.DataFrame({"Company": companies}) html_page = ReportMain(directory=".", filename='index.html', template_filename='main_summary.html', mode="summary") html_table = HTMLTable(df) html_table.add_href('Company', newtab=True, url="company_packages/", suffix="/index.html") html_page.jinja['data_table'] = html_table.to_html(collapse_table=False) html_page.jinja['analysis_domain'] = "All companies / All " html_page.jinja['tissue_directory'] = self.main_directory html_page.write()
def _create_summary_pages(self, main_directory, verbose=True, company=None): # Read all directories in tissue_packages directories = glob.glob(main_directory + os.sep + '*') summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[-1] if tcga not in self.tcga: continue if verbose: print(directory, tcga) # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = hits['Unnamed: 0'].unique() results = ANOVAResults(path + 'results.csv') if len(results) > 0: drug_ids = results.df.DRUG_ID.unique() else: drug_ids = [] # where to find the DRUG DECODE file. Should # have been copied path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([ tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public'] ]) df = pd.DataFrame(summary) df.columns = [ 'Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of' ] try: df.sort_values(by="Number of hits", ascending=False, inplace=True) except: df.sort("Number of hits", ascending=False, inplace=True) output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMain(directory=main_directory, filename='index.html', template_filename='datapack_summary.html', mode="summary") # Let us use our HTMLTable to add the HTML references self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') self.html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html( collapse_table=False) if company: self.html_page.jinja["collaborator"] = company self.html_page.write() return df
class GDSC(GDSCBase): """Wrapper of the :class:`~gdcstools.anova.ANOVA` class and reports to analyse all TCGA Tissues and companies automatically while creating summary HTML pages. First, one need to provide an unique IC50 file. Second, the DrugDecode file (see :class:`~gdsctools.readers.DrugDecode`) must be provided with the DRUG identifiers and their corresponding names. Third, a set of genomic feature files must be provided for each :term:`TCGA` tissue. You then create a GDSC instance:: from gdsctools import GDSC gg = GDSC('IC50_v18.csv', 'DRUG_DECODE.txt', genomic_feature_pattern='GF*csv') At that stage you may want to change the settings, e.g:: gg.settings.FDR_threshold = 20 Then run the analysis:: gg.analysis() This will launch an ANOVA analysis for each TCGA tissue + PANCAN case if provided. This will also create a data package for each tissue. The data packages are stored in ./tissue_packages directory. Since all private and public drugs are stored together, the next step is to create data packages for each company:: gg.create_data_packages_for_companies() you may select a specific one if you wish:: gg.create_data_packages_for_companies(['AZ']) Finally, create some summary pages:: gg.create_summary_pages() You entry point is an HTML file called **index.html** """ def __init__(self, ic50, drug_decode, genomic_feature_pattern="GF_*csv", main_directory="tissue_packages", verbose=True): """.. rubric:: Constructor :param ic50: an :class:`~gdsctools.readers.IC50` file. :param drug_decode: an :class:`~gdsctools.readers.DrugDecode` file. :param genomic_feature_pattern: a glob to a set of :class:`~gdsctools.readers.GenomicFeature` files. """ super(GDSC, self).__init__(genomic_feature_pattern, verbose=verbose) assert isinstance(ic50, str) self.ic50_filename = ic50 self.dd_filename = drug_decode self.main_directory = main_directory self.settings = ANOVASettings() self.settings.animate = False self.drug_decode = DrugDecode(drug_decode) print("Those settings will be used (check FDR_threshold)") print(self.settings) # figure out the cancer types: self.results = {} self.company_directory = "company_packages" # quick test on 15 features self.test = False def analyse(self, multicore=None): """Launch ANOVA analysis and creating data package for each tissue. :param bool onweb: By default, reports are created but HTML pages not shown. Set to True if you wish to open the HTML pages. :param multicore: number of cpu to use (1 by default) """ self.mkdir(self.main_directory) # First analyse all TCGA cases + PANCAN once for all and # store all the results in a dictionary. self._analyse_all(multicore=multicore) def _analyse_all(self, multicore=None): for gf_filename in sorted(self.gf_filenames): tcga = gf_filename.split("_")[1].split('.')[0] print(purple('======================== Analysing %s data' % tcga)) self.mkdir(self.main_directory + os.sep + tcga) # Computes the ANOVA try: self.ic50 = IC50(self.ic50_filename) except: print("Clustering IC50 (v18 released data ?)") self.ic50 = IC50Cluster(self.ic50_filename, verbose=False) an = ANOVA(self.ic50, gf_filename, self.drug_decode, verbose=False) if self.test is True: an.features.df = an.features.df[an.features.df.columns[0:15]] self.an = an an.settings = ANOVASettings(**self.settings) an.settings.analysis_type = tcga an.init() # This reset the directory results = an.anova_all(multicore=multicore) an.settings.directory = self.main_directory + os.sep + tcga # Store the results self.results[tcga] = results print('Analysing %s data and creating images' % tcga) self.report = ANOVAReport(an) self.report.settings.savefig = True self.report.create_html_pages(onweb=False) def create_data_packages_for_companies(self, companies=None): """Creates a data package for each company found in the DrugDecode file """ ########################################################## #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# # # # DRUG_DECODE and IC50 inputs must be filtered to keep # # only WEBRELEASE=Y and owner # # # #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# ########################################################## # companies must be just one name (one string) or a list of strings # By default, takes all companies found in DrugDecode if isinstance(companies, str): companies = [companies] if companies is None: companies = self.companies if len(companies) == 0: raise ValueError( "Could not find any companies in the DrugDecode file") # The main directory self.mkdir(self.company_directory) # Loop over all companies, retrieving information built # in analyse() method, selecting for each TCGA all information # for that company only (and public drugs) Ncomp = len(companies) for ii, company in enumerate(companies): print( purple("\n=========== Analysing company %s out of %s (%s)" % (ii + 1, Ncomp, company))) self.mkdir(self.company_directory + os.sep + company) # Handle each TCGA case separately for gf_filename in sorted(self.gf_filenames): tcga = gf_filename.split("_")[1].split('.')[0] print(brown(" ------- building TCGA %s sub directory" % tcga)) # Read the results previously computed either try: results_df = self.results[tcga].df.copy() except: results_path = "%s/%s/OUTPUT/results.csv" % ( self.main_directory, tcga) results_df = ANOVAResults(results_path) # MAke sure the results are formatted correctly results = ANOVAResults(results_df) # Get the DrugDecode information for that company only drug_decode_company = self.drug_decode.df.query( "WEBRELEASE=='Y' or OWNED_BY=='%s'" % company) # Transform into a proper DrugDecode class for safety drug_decode_company = DrugDecode(drug_decode_company) # Filter the results to keep only public drugs and that # company. Make sure this is integers results.df["DRUG_ID"] = results.df["DRUG_ID"].astype(int) mask = [ True if x in drug_decode_company.df.index else False for x in results.df.DRUG_ID ] results.df = results.df.ix[mask] # We read the IC50 again try: self.ic50 = IC50(self.ic50_filename) except: self.ic50 = IC50Cluster(self.ic50_filename, verbose=False) # And create an ANOVA instance. This is not to do the analyse # again but to hold various information an = ANOVA(self.ic50, gf_filename, drug_decode_company, verbose=False) def drug_to_keep(drug): to_keep = drug in drug_decode_company.df.index return to_keep an.ic50.df = an.ic50.df.select(drug_to_keep, axis=1) an.settings = ANOVASettings(**self.settings) an.init() an.settings.directory = self.company_directory + os.sep + company + os.sep + tcga an.settings.analysis_type = tcga # Now we create the report self.report = ANOVAReport(an, results, drug_decode=drug_decode_company, verbose=self.verbose) self.report.company = company self.report.settings.analysis_type = tcga self.report.create_html_main(False) self.report.create_html_manova(False) self.report.create_html_features() self.report.create_html_drugs() self.report.create_html_associations() def _get_tcga(self): return [x.split("_")[1].split(".")[0] for x in self.gf_filenames] tcga = property(_get_tcga) def _get_companies(self): return [x for x in self.drug_decode.companies if x != 'Commercial'] companies = property(_get_companies) def create_summary_pages(self): """Create summary pages Once the main analyis is done (:meth:`analyse`), and the company packages have been created (:meth:`create_data_packages_for_companies`), you can run this method that will creade a summary HTML page (index.html) for the tissue, and a similar summary HTML page for the tissues of each company. Finally, an HTML summary page for the companies is also created. The final tree direcorty looks like:: |-- index.html |-- company_packages | |-- index.html | |-- Company1 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html | |-- Company2 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html |-- tissue_packages | |-- index.html | |-- Tissue1 | |-- Tissue2 """ # First for the main directory (tissue_packages): print(purple("Creating summary index.html for the tissues")) self._create_summary_pages(self.main_directory, verbose=False) # Then for each companies: print(purple("Creating summary index.html for each company")) pb = Progress(len(self.companies)) for i, company in enumerate(self.companies): try: self._create_summary_pages(self.company_directory + os.sep + company, verbose=False, company=company) except Exception as err: print( red("Issue with %s. Continue with other companies" % company)) print(err) pb.animate(i + 1) # Finally, an index towards each company self._create_main_index() def _create_main_index(self): # We could also add a column with number of association ? companies = self.companies[:] df = pd.DataFrame({"Company": companies}) html_page = ReportMain(directory=".", filename='index.html', template_filename='main_summary.html', mode="summary") html_table = HTMLTable(df) html_table.add_href('Company', newtab=True, url="company_packages/", suffix="/index.html") html_page.jinja['data_table'] = html_table.to_html( collapse_table=False) html_page.jinja['analysis_domain'] = "All companies / All " html_page.jinja['tissue_directory'] = self.main_directory html_page.write() def _create_summary_pages(self, main_directory, verbose=True, company=None): # Read all directories in tissue_packages directories = glob.glob(main_directory + os.sep + '*') summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[-1] if tcga not in self.tcga: continue if verbose: print(directory, tcga) # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = hits['Unnamed: 0'].unique() results = ANOVAResults(path + 'results.csv') if len(results) > 0: drug_ids = results.df.DRUG_ID.unique() else: drug_ids = [] # where to find the DRUG DECODE file. Should # have been copied path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([ tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public'] ]) df = pd.DataFrame(summary) df.columns = [ 'Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of' ] try: df.sort_values(by="Number of hits", ascending=False, inplace=True) except: df.sort("Number of hits", ascending=False, inplace=True) output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMain(directory=main_directory, filename='index.html', template_filename='datapack_summary.html', mode="summary") # Let us use our HTMLTable to add the HTML references self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') self.html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html( collapse_table=False) if company: self.html_page.jinja["collaborator"] = company self.html_page.write() return df
def _create_summary_pages(self, main_directory, verbose=True, company=None): # Read all directories in tissue_packages directories = glob.glob(main_directory + os.sep + '*') summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[-1] if tcga not in self.tcga: continue if verbose: print(directory, tcga) # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = hits['Unnamed: 0'].unique() results = ANOVAResults(path + 'results.csv') if len(results) > 0: drug_ids = results.df.DRUG_ID.unique() else: drug_ids = [] # where to find the DRUG DECODE file. Should # have been copied path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public']]) df = pd.DataFrame(summary) df.columns = ['Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of'] try: df.sort_values(by="Number of hits", ascending=False, inplace=True) except: df.sort("Number of hits", ascending=False, inplace=True) output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMain(directory=main_directory, filename='index.html', template_filename='datapack_summary.html', mode="summary") # Let us use our HTMLTable to add the HTML references self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') self.html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html( collapse_table=False) if company: self.html_page.jinja["collaborator"] = company self.html_page.write() return df
class GDSC(GDSCBase): """Wrapper of the :class:`~gdcstools.anova.ANOVA` class and reports to analyse all TCGA Tissues and companies automatically while creating summary HTML pages. First, one need to provide an unique IC50 file. Second, the DrugDecode file (see :class:`~gdsctools.readers.DrugDecode`) must be provided with the DRUG identifiers and their corresponding names. Third, a set of genomic feature files must be provided for each :term:`TCGA` tissue. You then create a GDSC instance:: from gdsctools import GDSC gg = GDSC('IC50_v18.csv', 'DRUG_DECODE.txt', genomic_feature_pattern='GF*csv') At that stage you may want to change the settings, e.g:: gg.settings.FDR_threshold = 20 Then run the analysis:: gg.analysis() This will launch an ANOVA analysis for each TCGA tissue + PANCAN case if provided. This will also create a data package for each tissue. The data packages are stored in ./tissue_packages directory. Since all private and public drugs are stored together, the next step is to create data packages for each company:: gg.create_data_packages_for_companies() you may select a specific one if you wish:: gg.create_data_packages_for_companies(['AZ']) Finally, create some summary pages:: gg.create_summary_pages() You entry point is an HTML file called **index.html** """ def __init__(self, ic50, drug_decode, genomic_feature_pattern="GF_*csv", main_directory="tissue_packages", verbose=True): """.. rubric:: Constructor :param ic50: an :class:`~gdsctools.readers.IC50` file. :param drug_decode: an :class:`~gdsctools.readers.DrugDecode` file. :param genomic_feature_pattern: a glob to a set of :class:`~gdsctools.readers.GenomicFeature` files. """ super(GDSC, self).__init__(genomic_feature_pattern, verbose=verbose) assert isinstance(ic50, str) self.ic50_filename = ic50 self.dd_filename = drug_decode self.main_directory = main_directory self.settings = ANOVASettings() self.settings.animate = False self.drug_decode = DrugDecode(drug_decode) print("Those settings will be used (check FDR_threshold)") print(self.settings) # figure out the cancer types: self.results = {} self.company_directory = "company_packages" # quick test on 15 features self.test = False def analyse(self, multicore=None): """Launch ANOVA analysis and creating data package for each tissue. :param bool onweb: By default, reports are created but HTML pages not shown. Set to True if you wish to open the HTML pages. :param multicore: number of cpu to use (1 by default) """ self.mkdir(self.main_directory) # First analyse all TCGA cases + PANCAN once for all and # store all the results in a dictionary. self._analyse_all(multicore=multicore) def _analyse_all(self, multicore=None): for gf_filename in sorted(self.gf_filenames): tcga = gf_filename.split("_")[1].split('.')[0] print(purple('======================== Analysing %s data' % tcga)) self.mkdir(self.main_directory + os.sep + tcga) # Computes the ANOVA try: self.ic50 = IC50(self.ic50_filename) except: print("Clustering IC50 (v18 released data ?)") self.ic50 = IC50Cluster(self.ic50_filename, verbose=False) an = ANOVA(self.ic50, gf_filename, self.drug_decode, verbose=False) if self.test is True: an.features.df = an.features.df[an.features.df.columns[0:15]] self.an = an an.settings = ANOVASettings(**self.settings) an.settings.analysis_type = tcga an.init() # This reset the directory results = an.anova_all(multicore=multicore) an.settings.directory = self.main_directory + os.sep + tcga # Store the results self.results[tcga] = results print('Analysing %s data and creating images' % tcga) self.report = ANOVAReport(an) self.report.settings.savefig = True self.report.create_html_pages(onweb=False) def create_data_packages_for_companies(self, companies=None): """Creates a data package for each company found in the DrugDecode file """ ########################################################## #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# # # # DRUG_DECODE and IC50 inputs must be filtered to keep # # only WEBRELEASE=Y and owner # # # #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# ########################################################## # companies must be just one name (one string) or a list of strings # By default, takes all companies found in DrugDecode if isinstance(companies, str): companies = [companies] if companies is None: companies = self.companies if len(companies) == 0: raise ValueError("Could not find any companies in the DrugDecode file") # The main directory self.mkdir(self.company_directory) # Loop over all companies, retrieving information built # in analyse() method, selecting for each TCGA all information # for that company only (and public drugs) Ncomp = len(companies) for ii, company in enumerate(companies): print(purple("\n=========== Analysing company %s out of %s (%s)" % (ii+1, Ncomp, company))) self.mkdir(self.company_directory + os.sep + company) # Handle each TCGA case separately for gf_filename in sorted(self.gf_filenames): tcga = gf_filename.split("_")[1].split('.')[0] print(brown(" ------- building TCGA %s sub directory" % tcga)) # Read the results previously computed either try: results_df = self.results[tcga].df.copy() except: results_path = "%s/%s/OUTPUT/results.csv" % (self.main_directory, tcga) results_df = ANOVAResults(results_path) # MAke sure the results are formatted correctly results = ANOVAResults(results_df) # Get the DrugDecode information for that company only drug_decode_company = self.drug_decode.df.query( "WEBRELEASE=='Y' or OWNED_BY=='%s'" % company) # Transform into a proper DrugDecode class for safety drug_decode_company = DrugDecode(drug_decode_company) # Filter the results to keep only public drugs and that # company. Make sure this is integers results.df["DRUG_ID"] = results.df["DRUG_ID"].astype(int) mask = [True if x in drug_decode_company.df.index else False for x in results.df.DRUG_ID] results.df = results.df.ix[mask] # We read the IC50 again try: self.ic50 = IC50(self.ic50_filename) except: self.ic50 = IC50Cluster(self.ic50_filename, verbose=False) # And create an ANOVA instance. This is not to do the analyse # again but to hold various information an = ANOVA(self.ic50, gf_filename, drug_decode_company, verbose=False) def drug_to_keep(drug): to_keep = drug in drug_decode_company.df.index return to_keep an.ic50.df = an.ic50.df.select(drug_to_keep, axis=1) an.settings = ANOVASettings(**self.settings) an.init() an.settings.directory = self.company_directory + os.sep + company + os.sep + tcga an.settings.analysis_type = tcga # Now we create the report self.report = ANOVAReport(an, results, drug_decode=drug_decode_company, verbose=self.verbose) self.report.company = company self.report.settings.analysis_type = tcga self.report.create_html_main(False) self.report.create_html_manova(False) self.report.create_html_features() self.report.create_html_drugs() self.report.create_html_associations() def _get_tcga(self): return [x.split("_")[1].split(".")[0] for x in self.gf_filenames] tcga = property(_get_tcga) def _get_companies(self): return [x for x in self.drug_decode.companies if x != 'Commercial'] companies = property(_get_companies) def create_summary_pages(self): """Create summary pages Once the main analyis is done (:meth:`analyse`), and the company packages have been created (:meth:`create_data_packages_for_companies`), you can run this method that will creade a summary HTML page (index.html) for the tissue, and a similar summary HTML page for the tissues of each company. Finally, an HTML summary page for the companies is also created. The final tree direcorty looks like:: |-- index.html |-- company_packages | |-- index.html | |-- Company1 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html | |-- Company2 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html |-- tissue_packages | |-- index.html | |-- Tissue1 | |-- Tissue2 """ # First for the main directory (tissue_packages): print(purple("Creating summary index.html for the tissues")) self._create_summary_pages(self.main_directory, verbose=False) # Then for each companies: print(purple("Creating summary index.html for each company")) pb = Progress(len(self.companies)) for i, company in enumerate(self.companies): try: self._create_summary_pages(self.company_directory + os.sep + company, verbose=False, company=company) except Exception as err: print(red("Issue with %s. Continue with other companies" % company)) print(err) pb.animate(i+1) # Finally, an index towards each company self._create_main_index() def _create_main_index(self): # We could also add a column with number of association ? companies = self.companies[:] df = pd.DataFrame({"Company": companies}) html_page = ReportMain(directory=".", filename='index.html', template_filename='main_summary.html', mode="summary") html_table = HTMLTable(df) html_table.add_href('Company', newtab=True, url="company_packages/", suffix="/index.html") html_page.jinja['data_table'] = html_table.to_html(collapse_table=False) html_page.jinja['analysis_domain'] = "All companies / All " html_page.jinja['tissue_directory'] = self.main_directory html_page.write() def _create_summary_pages(self, main_directory, verbose=True, company=None): # Read all directories in tissue_packages directories = glob.glob(main_directory + os.sep + '*') summary = [] for directory in sorted(directories): tcga = directory.split(os.sep)[-1] if tcga not in self.tcga: continue if verbose: print(directory, tcga) # number of hits path = directory + os.sep + 'OUTPUT' + os.sep try: hits = pd.read_csv(path + 'drugs_summary.csv', sep=',') except: summary.append([tcga] + [None] * 5) continue total_hits = hits.total.sum() drug_involved = hits['Unnamed: 0'].unique() results = ANOVAResults(path + 'results.csv') if len(results) > 0: drug_ids = results.df.DRUG_ID.unique() else: drug_ids = [] # where to find the DRUG DECODE file. Should # have been copied path = directory + os.sep + 'INPUT' + os.sep drug_decode = DrugDecode(path + 'DRUG_DECODE.csv') info = drug_decode.get_info() webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE drug_inv_public = sum(webrelease == 'Y') drug_inv_prop = sum(webrelease != 'Y') summary.append([tcga, total_hits, drug_inv_prop, info['N_prop'], drug_inv_public, info['N_public']]) df = pd.DataFrame(summary) df.columns = ['Analysis name', 'Number of hits', 'Number of involved proprietary compounds', 'out of', 'Number of involved public', 'out of'] try: df.sort_values(by="Number of hits", ascending=False, inplace=True) except: df.sort("Number of hits", ascending=False, inplace=True) output_dir = main_directory + os.sep + '..' + os.sep output_file = output_dir + os.sep + 'index.html' self.html_page = ReportMain(directory=main_directory, filename='index.html', template_filename='datapack_summary.html', mode="summary") # Let us use our HTMLTable to add the HTML references self.html_table = HTMLTable(df) self.html_table.add_href('Analysis name', newtab=True, url=None, suffix='/index.html') self.html_table.add_bgcolor('Number of hits') self.html_page.jinja['data_table'] = self.html_table.to_html( collapse_table=False) if company: self.html_page.jinja["collaborator"] = company self.html_page.write() return df