def download(self, synid): """Download a file into the dreamtools directory :param synid: a valid synapse id (e.g., syn123456) You must have a login on synapse website. """ # If not connected, nothing will be possible, so just skip the download # step. if self.client._connected is False: print( red("You do not seem to have a network connection. ") + red("Downloading required data is not possible")) sys.exit(1) assert synid.startswith('syn'), \ "synid must be a valid synapse identifier e.g., syn123456" try: self.client.get(synid, downloadLocation=self.directory) except Exception as err: print('Original error message from synapseclient:') print(err) print( "DREAMTools warning: this is most probably a file that requires you to accept the conditions of use of the data. We will open the relevant page for you now. Please click 'show; on the RHS of the Conditions of use and Accept the terms of use" ) sys.exit()
def download(self, synid): """Download a file into the dreamtools directory :param synid: a valid synapse id (e.g., syn123456) You must have a login on synapse website. """ # If not connected, nothing will be possible, so just skip the download # step. if self.client._connected is False: print(red("You do not seem to have a network connection. ") + red("Downloading required data is not possible")) sys.exit(1) assert synid.startswith('syn'), \ "synid must be a valid synapse identifier e.g., syn123456" try: self.client.get(synid, downloadLocation=self.directory) except Exception as err: print('Original error message from synapseclient:') print(err) print("DREAMTools warning: this is most probably a file that requires you to accept the conditions of use of the data. We will open the relevant page for you now. Please click 'show; on the RHS of the Conditions of use and Accept the terms of use") sys.exit()
def anova_one_drug_one_feature(anova, options): """Analyse the entire data set""" from gdsctools import anova_report if options.tissue is not None: anova.set_cancer_type(options.tissue) odof = anova_report.Association(anova, drug=options.drug, feature=options.feature) #print(odof.settings) # for the HTML #odof.add_dependencies = True #odof.add_settings = True df = odof.run() if df.ix[1]['FEATURE_IC50_effect_size'] is None: msg = "association %s vs %s not valid for testing (not enough" +\ " MSI or positives for that features ? Try with "+\ " --exclude-msi (you must then set a tissue with "+\ " --tissue" print(red(msg % (options.drug, options.feature))) else: print(df.T) # HTML report if options.no_html is True: return odof.create_report(onweb=options.onweb)
def anova_one_drug_one_feature(anova, options): """Analyse the entire data set""" from gdsctools import anova_report from gdsctools.report import ReportMain if options.tissue is not None: anova.set_cancer_type(options.tissue) # just to create the directory ReportMain(directory=options.directory) odof = anova_report.Association(anova, drug=int(options.drug), feature=options.feature) odof.add_href = False df = odof.run() if df.loc[1]['FEATURE_IC50_effect_size'] is None: msg = "association %s vs %s not valid for testing (not enough" +\ " MSI or positives for that features ? Try with "+\ " --exclude-msi (you must then set a tissue with "+\ " --tissue" print(red(msg % (options.drug, options.feature))) else: print(df.T) # HTML report if options.no_html is True: return odof.create_report(onweb=options.onweb)
def standalone(args=None): """This function is used by the standalone application called cellnopt_boolean :: cno_milp --help """ if args is None: args = sys.argv[:] user_options = OptionsMILP() if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) if options.onweb is True or options.report is True: o = MILPTrain(options.pknmodel, options.data, verbose=options.verbose ) if options.onweb is True: o.optimise() o.onweb() elif options.report is True: o.optimise() o.report() else: from easydev.console import red print(red("No report requested; nothing will be saved or shown")) print("use --on-web or --report options")
def anova_one_drug(anova, options): """Analyse one specific drug""" from gdsctools import ANOVAReport anova.set_cancer_type(options.tissue) if options.feature: anova.feature_names = options.features results = anova.anova_one_drug(options.drug) print("\nFound %s associations" % len(results)) if len(results)==0: print(red("\nPlease try with another drug or no --drug option")) return # ?? is this required ? N = len(results) results.df.insert(0, 'ASSOC_ID', range(1, N+1)) if options.no_html is True: return r = ANOVAReport(anova, results=results) print(darkgreen("\nCreating all figure and html documents in %s" % r.settings.directory)) r.create_html_pages(onweb=options.onweb)
def anova_one_drug(anova, options): """Analyse one specific drug""" from gdsctools import ANOVAReport anova.set_cancer_type(options.tissue) if options.feature: anova.feature_names = options.features results = anova.anova_one_drug(options.drug) print("\nFound %s associations" % len(results)) if len(results) == 0: print(red("\nPlease try with another drug or no --drug option")) return # ?? is this required ? N = len(results) results.df.insert(0, 'ASSOC_ID', range(1, N + 1)) if options.no_html is True: return r = ANOVAReport(anova, results=results) print( darkgreen("\nCreating all figure and html documents in %s" % r.settings.directory)) r.create_html_pages(onweb=options.onweb)
def report(self): """Create report and shows report (or not)""" if self.options.onweb is True: self.trainer.report(show=True) elif self.options.report is True: self.trainer.report(show=False) else: from easydev.console import red print(red("No report requested; nothing will be saved or shown")) print("use --on-web or --report options")
def create_summary_pages(self): """Create summary pages Once the main analyis is done (:meth:`analyse`), and the company packages have been created (:meth:`create_data_packages_for_companies`), you can run this method that will creade a summary HTML page (index.html) for the tissue, and a similar summary HTML page for the tissues of each company. Finally, an HTML summary page for the companies is also created. The final tree direcorty looks like:: |-- index.html |-- company_packages | |-- index.html | |-- Company1 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html | |-- Company2 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html |-- tissue_packages | |-- index.html | |-- Tissue1 | |-- Tissue2 """ # First for the main directory (tissue_packages): print(purple("Creating summary index.html for the tissues")) self._create_summary_pages(self.main_directory, verbose=False) # Then for each companies: print(purple("Creating summary index.html for each company")) pb = Progress(len(self.companies)) for i, company in enumerate(self.companies): try: self._create_summary_pages(self.company_directory + os.sep + company, verbose=False, company=company) except Exception as err: print( red("Issue with %s. Continue with other companies" % company)) print(err) pb.animate(i + 1) # Finally, an index towards each company self._create_main_index()
def create_summary_pages(self): """Create summary pages Once the main analyis is done (:meth:`analyse`), and the company packages have been created (:meth:`create_data_packages_for_companies`), you can run this method that will creade a summary HTML page (index.html) for the tissue, and a similar summary HTML page for the tissues of each company. Finally, an HTML summary page for the companies is also created. The final tree direcorty looks like:: |-- index.html |-- company_packages | |-- index.html | |-- Company1 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html | |-- Company2 | | |-- Tissue1 | | |-- Tissue2 | | |-- index.html |-- tissue_packages | |-- index.html | |-- Tissue1 | |-- Tissue2 """ # First for the main directory (tissue_packages): print(purple("Creating summary index.html for the tissues")) self._create_summary_pages(self.main_directory, verbose=False) # Then for each companies: print(purple("Creating summary index.html for each company")) pb = Progress(len(self.companies)) for i, company in enumerate(self.companies): try: self._create_summary_pages(self.company_directory + os.sep + company, verbose=False, company=company) except Exception as err: print(red("Issue with %s. Continue with other companies" % company)) print(err) pb.animate(i+1) # Finally, an index towards each company self._create_main_index()
def sequana_init(options): import sequana from sequana.misc import textwrap from sequana import SequanaConfig, sequana_data sa = Tools(verbose=options.verbose) # Check that the pipeline is well defined module = Module(options.pipeline) if os.path.exists(options.target_dir): txt = "Will override the following files if present: %s.rules " +\ "config.yaml, runme.sh, ..." sa.blue(txt % options.pipeline) if options.force is True: choice = "y" else: choice = input( red("Do you want to proceed (to avoid this " + " message, use --force)? [y]/n:")) if choice == "n": sys.exit(0) # Copying snakefile logger.info("Copying snakefile") sa.mkdir(options.target_dir) shutil.copy(module.snakefile, options.target_dir + os.sep + options.pipeline + ".rules") # Creating README to print on the screen and in a file txt = "User command::\n\n" txt += " %s \n\n" % " ".join(sys.argv) txt += "You can now run snakemake yourself or type::" txt += purple(""" snakemake -s %s.rules --stats stats.txt -p -j 4 """ % options.pipeline) txt += """ # -j 4 means you will use 4 cores # -p prints the commands used # --stats stats.txt must be used since stats.txt is expected to be found. or just run the bash script:: sh runme.sh EDIT THE config.yaml if needed Once finished with success, the report/ directory contains a summary.html and relevant files (depends on the pipeline). """ logger.info("Creating README") with open(options.target_dir + os.sep + "README", "w") as fh: fh.write(txt.replace("\x1b[35m", "").replace("\x1b[39;49;00m", "")) # Creating Config file logger.info("Creating the config file") # Create (if needed) and update the config file config_filename = options.target_dir + os.sep + "config.yaml" if options.config: # full existing path if os.path.exists(options.config): shutil.copy(options.config, config_filename) else: # or a sequana config file in the module path ? raise (IOError("Config file %s not found locally" % options.config)) else: copy_config_from_sequana(module, "config.yaml", config_filename) # Copy multiqc if it is available multiqc_filename = options.target_dir + os.sep + "multiqc_config.yaml" copy_config_from_sequana(module, "multiqc_config.yaml", multiqc_filename) cluster_cfg_filename = options.target_dir + os.sep + "cluster_config.json" copy_config_from_sequana(module, "cluster_config.json", cluster_cfg_filename) # The input cfg = SequanaConfig(config_filename) cfg.config.input_directory = options.input_directory cfg.config.input_pattern = options.pattern cfg.config.input_extension = options.extension cfg.config.input_samples.file1 = options.file1 cfg.config.input_samples.file2 = options.file2 cfg.config.input_readtag = options.input_readtag # Dedicated section for quality control section if options.pipeline == "quality_control": if options.design: shutil.copy(options.design, options.target_dir + os.sep) cfg.config['cutadapt'].design_file = os.path.basename( options.design) if options.kraken: cfg.config.kraken.database_directory = os.path.abspath( options.kraken) cfg.config.kraken.do = True else: cfg.config.kraken.do = False cfg.config['cutadapt'].fwd = options.adapter_fwd cfg.config['cutadapt'].rev = options.adapter_rev cfg.config['cutadapt'].adapter_type = options.adapters # Foir all pipeline using BWA if options.reference: cfg.config.bwa_mem.reference = os.path.abspath(options.reference) if options.pipeline == "variant_calling": if options.reference: cfg.config.bwa_mem_ref.reference = os.path.abspath( options.reference) if options.pipeline in ["rnaseq", "smallrnaseq"]: if options.design: shutil.copy(options.design, options.target_dir + os.sep) cfg.config['cutadapt'].design_file = os.path.basename( options.design) cfg.config['cutadapt'].fwd = options.adapter_fwd cfg.config['cutadapt'].rev = options.adapter_rev cfg.config['cutadapt'].adapter_choice = options.adapters cfg.copy_requirements(target=options.target_dir) # FIXME If invalid, no error raised if options.config_params: params = [this.strip() for this in options.config_params.split(",")] for param in params: if param.count(":") not in [1, 2, 3]: txt = "incorrect format following --config-params" txt += "Expected at least one : sign or at most 2 of them" txt += "Config file section such as :\n" txt += "project: tutorial\n" txt += "should be encoded project:tutorial" raise ValueError(txt) if param.count(":") == 1: k, v = param.split(':') cfg.config[k] = v elif param.count(":") == 2: k1, k2, v = param.split(":") cfg.config[k1][k2] = v elif param.count(":") == 3: k1, k2, k3, v = param.split(":") cfg.config[k1][k2][k3] = v # important to update yaml with content of config cfg._update_yaml() cfg.save(config_filename) # Creating a unique runme.sh file runme_filename = options.target_dir + os.sep + "runme.sh" with open(runme_filename, "w") as fout: cmd = "#!/bin/sh\n" cmd += "# generated with sequana version %s with this command:\n" % sequana.version cmd += "# %s\n" % " ".join(sys.argv) cmd += "snakemake -s %(project)s.rules --stats stats.txt -p -j %(jobs)s --nolock" if options.forceall: cmd += " --forceall " if options.cluster: # Do we want to include the cluster config option ? cluster_config = Module(options.pipeline).cluster_config if options.ignore_cluster_config is True: cluster_config = None if cluster_config is None: cmd += ' --cluster "%s"' % options.cluster else: cmd += ' --cluster "%s" --cluster-config %s' %\ (options.cluster, os.path.basename(cluster_config)) if options.redirection: cmd += " 1>run.out 2>run.err" fout.write( cmd % { 'project': options.pipeline, 'jobs': options.jobs, "version": sequana.version }) # change permission of runme.sh to 755 st = os.stat(runme_filename) os.chmod(runme_filename, st.st_mode | 0o755) sa.green("Initialisation of %s succeeded" % options.target_dir) sa.green("Please, go to the project directory ") sa.purple("\n cd %s\n" % options.target_dir) sa.green("Check out the README and config.yaml files") sa.green("A basic script to run the analysis is named runme.sh ") sa.purple("\n sh runme.sh\n") sa.purple("On a slurm cluster, you may type:") sa.purple("\n srun --qos normal runme.sh\n") sa.green( "In case of trouble, please post an issue on https://github.com/sequana/sequana/issue " ) sa.green( "or type sequana --issue and fill a post with the error and the config file (NO DATA PLEASE)" ) # Change permission try: #python 3 os.chmod(runme_filename, 0o755) except: logger.info( "Please use Python3. Change the mode of %s manually to 755" % runme_filename)
def red(self, txt, force=False): if self.verbose or force is True: print(red(txt))
def run(self, color=True): """Executes 'python setup.py' with the user commands on all packages. """ if color: try: from easydev.console import bold, red, green, \ color_terminal, nocolor, underline, purple except: try: sys.path.insert(0, os.path.join('deploy', 'src', 'deploy')) from console import bold, red, green, \ color_terminal, nocolor, underline, purple except: pass if not color_terminal(): # Windows' poor cmd box doesn't understand ANSI sequences nocolor() else: bold = purple = red = green = underline = str print(bold("Running multisetup version %s" % __revision__.split()[2])) #project_dir = self.curdir.basename() directories = [package for package in self.packages] print('Will process the following directories: ', ) for directory in directories: print(bold(directory)), #print bold(directory.basename()), print('') try: for directory in directories: try: os.chdir(directory) print( underline('Entering %s package' % os.path.basename(directory))) # % directory.basename()) except OSError as err: print( underline('Entering %s package' % os.path.basename(directory))) print( red("cannot find this directory (%s)" % os.path.basename(directory))) print(err) print('Python exec : ', sys.executable) #print underline('Entering %s package' % directory.basename()) for cmd in self.commands: setup_command = '%s setup.py %s ' % (sys.executable, cmd) print("\tExecuting " + setup_command + '...processing', ) #Run setup.py with user commands outputs = None errors = None if self.verbose: process = Popen(setup_command, shell=True) status = process.wait() else: process = Popen(setup_command, stdout=PIPE, stderr=PIPE, shell=True) #status = process.wait() outputs, errors = process.communicate() if process.returncode == 0: print(green('done')) else: if not self.verbose: print( red('\tFailed. ( error code %s) ' % (process.returncode))) os.chdir(self.curdir) if not self.force: raise RuntimeError() if 'pylint' in cmd: if outputs is not None: for x in outputs.split('\n'): if x.startswith('Your code has been'): print(purple('\t%s' % x)) if 'nosetests' in cmd: if errors is not None: for x in errors.split('\n'): if x.startswith('TOTAL'): res = x.replace('TOTAL', 'Total coverage') res = " ".join(res.split()) print(purple('\t%s' % res)) if x.startswith('Ran'): print(purple('\t%s' % x)) if x.startswith('FAILED'): print(purple('\t%s' % x)) else: print(purple('all right')) os.chdir(self.curdir) except RuntimeError: sys.exit() os.chdir(self.curdir)
def anova_pipeline(args=None): """This function is used by the standalone application called **gdsctools_anova** Type:: gdsctools_anova --help to get some help. """ msg = "Welcome to GDSCTools standalone" print_color(msg, purple, underline=True) # Keep the argument args as None by default to # allow testing e.g., in nosetests if args is None: args = sys.argv[:] elif len(args) == 1: args += ['--help'] user_options = ANOVAOptions(prog="gdsctools_anova") try: options = user_options.parse_args(args[1:]) except SystemExit: return # ----------------------------------------------------------------- # ---------------------------------------- options without analysis # ----------------------------------------------------------------- if options.version is True: print("This is version %s of gdsctools_anova" % gdsctools.version) return if options.testing is True: print('Testing mode:') from gdsctools import ANOVA, ic50_test an = ANOVA(ic50_test) df = an.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut') assert df.loc[1,'N_FEATURE_pos'] == 554, \ "N_feature_pos must be equal to 554" print(df.T) print(darkgreen("\nGDSCTools seems to be installed properly")) return if options.save_settings: from gdsctools import ANOVA, ic50_test an = ANOVA(ic50_test) an.settings.to_json(options.save_settings) print('Save a default parameter set in %s' % options.save_settings) return if options.license is True: print(gdsctools.license) return if options.summary is True: from gdsctools import anova an = anova.ANOVA(options.input_ic50, options.input_features) print(an) return if options.print_tissues is True: from gdsctools import anova an = anova.ANOVA(options.input_ic50, options.input_features) tissues = an.tissue_factor try: tissues = tissues.sort_values('Tissue Factor').unique() except: tissues = tissues.sort(inplace=False).unique() for name in tissues: print(name) return if options.print_drugs is True: from gdsctools import anova gdsc = anova.ANOVA(options.input_ic50, options.input_features) import textwrap print("\n".join(textwrap.wrap(" , ".join(gdsc.drugIds)))) return if options.print_features is True: from gdsctools import anova gdsc = anova.ANOVA(options.input_ic50, options.input_features) import textwrap print("\n".join(textwrap.wrap(" , ".join(gdsc.feature_names)))) return # ----------------------------------------------------------------- # --------------------------------------------------- real analysis # ----------------------------------------------------------------- # dispatcher to the functions according to the user parameters from gdsctools import ANOVA, ANOVAReport anova = ANOVA(options.input_ic50, options.input_features, options.input_drug, low_memory=not options.fast) anova = _set_settings(anova, options) if options.drug and options.drug not in anova.ic50.df.columns: print(red("Invalid Drug. Try --print-drug-names")) sys.exit(1) if options.drug is not None and options.feature is not None: print_color("ODOF mode", purple) anova_one_drug_one_feature(anova, options) elif options.drug is not None: print_color("ODAF mode", purple) anova_one_drug(anova, options) else: # analyse everything if options.feature is None: print_color("ADAF mode", purple) else: print_color("ADOF mode", purple) anova_all(anova, options) if options.onweb is False and options.no_html is False: msg = "\nNote that a directory {} was created and files saved into it" print(purple(msg.format(options.directory))) return
def run(self, color=True): """Executes 'python setup.py' with the user commands on all packages. """ if color: try: from easydev.console import bold, red, green, \ color_terminal, nocolor, underline, purple except: try: sys.path.insert(0, os.path.join('deploy', 'src', 'deploy')) from console import bold, red, green, \ color_terminal, nocolor, underline, purple except: pass if not color_terminal(): # Windows' poor cmd box doesn't understand ANSI sequences nocolor() else: bold = purple = red = green = underline = str print(bold("Running multisetup version %s" % __revision__.split()[2])) #project_dir = self.curdir.basename() directories = [package for package in self.packages] print('Will process the following directories: ',) for directory in directories: print(bold(directory)), #print bold(directory.basename()), print('') try: for directory in directories: try: os.chdir(directory) print(underline('Entering %s package' % os.path.basename(directory))) # % directory.basename()) except OSError as err: print(underline('Entering %s package' % os.path.basename(directory))) print(red("cannot find this directory (%s)" % os.path.basename(directory))) print(err) print('Python exec : ' , sys.executable) #print underline('Entering %s package' % directory.basename()) for cmd in self.commands: setup_command = '%s setup.py %s ' % (sys.executable,cmd) print("\tExecuting " + setup_command + '...processing',) #Run setup.py with user commands outputs = None errors = None if self.verbose: process = Popen(setup_command, shell=True) status = process.wait() else: process = Popen(setup_command, stdout=PIPE, stderr=PIPE, shell=True) #status = process.wait() outputs, errors = process.communicate() if process.returncode == 0: print(green('done')) else: if not self.verbose: print(red('\tFailed. ( error code %s) ' % (process.returncode))) os.chdir(self.curdir) if not self.force: raise RuntimeError() if 'pylint' in cmd: if outputs is not None: for x in outputs.split('\n'): if x.startswith('Your code has been'): print(purple('\t%s' % x)) if 'nosetests' in cmd: if errors is not None: for x in errors.split('\n'): if x.startswith('TOTAL'): res = x.replace('TOTAL', 'Total coverage') res = " ".join (res.split()) print(purple('\t%s' % res)) if x.startswith('Ran'): print(purple('\t%s' % x)) if x.startswith('FAILED'): print(purple('\t%s' % x)) else: print(purple('all right')) os.chdir(self.curdir) except RuntimeError: sys.exit() os.chdir(self.curdir)