def test_create_bc_report(self): """Create a demultiplex report and upload it to gdocs """ # Parse the config config_file = os.path.join(self.data_dir, "post_process.yaml") self.config = load_config(config_file) # Loop over the runs for name in self.runname: print "\nProcessing %s" % name fc_name, fc_date = get_flowcell_info(name) analysisdir = os.path.join(self.workdir, name) assert create_report_on_gdocs(fc_date, fc_name, self.run_info_file, {"work": analysisdir, "flowcell": analysisdir}, self.config), "Report creation failed"
def create_report_on_gdocs(*args): [fc_date, fc_name, run_info_yaml, dirs, config] = args return sequencing_report.create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config)
def main(run_id, config_file, run_info_file=None, dryrun=False): assert run_id, \ "No run id was specified" assert os.path.exists(config_file), \ "The configuration file, {}, could not be found".format(config_file) config = load_config(config_file) assert "gdocs_upload" in config, \ "The configuration file, {}, has no section specifying the Google docs details".format(config_file) analysis_cfg = config.get("analysis", {}) if "store_dir" in analysis_cfg: archive_dir = os.path.join(analysis_cfg["store_dir"], run_id) else: archive_dir = os.getcwd() analysis_dir = None if "base_dir" in analysis_cfg: analysis_dir = os.path.join(analysis_cfg["base_dir"], run_id) if analysis_dir is None or not os.path.exists(analysis_dir): analysis_dir = tempfile.mkdtemp() dirs = { "work": os.path.normpath(analysis_dir), "flowcell": os.path.normpath(archive_dir) } assert os.path.exists(dirs["flowcell"]), \ "The flowcell directory, {}, could not be found".format(dirs["flowcell"]) assert os.path.exists(dirs["work"]), \ "The work directory, {}, could not be found".format(dirs["work"]) if run_info_file is None: run_info_file = os.path.join(dirs["flowcell"], "run_info.yaml") if not os.path.exists(run_info_file): # Locate the samplesheet and convert to yaml samplesheet = _find_samplesheet(dirs["flowcell"]) assert samplesheet, \ "Could not locate samplesheet in {}, aborting..".format(dirs["flowcell"]) fh, run_info_file = tempfile.mkstemp() os.close(fh) run_info_file = ssheet.csv2yaml(samplesheet, run_info_file) assert os.path.exists(run_info_file), \ "The run info configuration file, {}, could not be found".format(run_info_file) fc_name, fc_date = fc.get_flowcell_info(dirs["flowcell"]) # If we have no bc_metrics files in the workdir, we may be looking at a Casava run. # In that case, attempt to parse the Demultiplex_Stats.htm file and create bc_metrics files metric_files = glob.glob( os.path.join(dirs["work"], "*_barcode", "*bc[_.]metrics")) + glob.glob( os.path.join(dirs["work"], "*bc[_.]metrics")) if len(metric_files) == 0: casava_report = _find_casava_report(dirs["flowcell"]) assert len(casava_report) > 0, \ "Could not locate CASAVA demultiplex report in {}, aborting..".format(dirs["flowcell"]) metric_files = _casava_report_to_metrics(run_info_file, casava_report, dirs) assert len(metric_files) > 0, \ "Could not locate or create required metric files, aborting.." print( "A report will be created on Google Docs based on the demultiplexed data in {}" .format(dirs["work"])) print("The configuration file is {0} and the run info file is {1}".format( config_file, run_info_file)) print("The run was started on {0} and has flowcell id {1}".format( fc_date, fc_name)) if not dryrun: create_report_on_gdocs(fc_date, fc_name, run_info_file, dirs, config) else: print("DRY-RUN: nothing uploaded")
def main(run_id, config_file, run_info_file=None, dryrun=False): assert run_id, \ "No run id was specified" assert os.path.exists(config_file), \ "The configuration file, {}, could not be found".format(config_file) config = load_config(config_file) assert "gdocs_upload" in config, \ "The configuration file, {}, has no section specifying the Google docs details".format(config_file) analysis_cfg = config.get("analysis",{}) if "store_dir" in analysis_cfg: archive_dir = os.path.join(analysis_cfg["store_dir"], run_id) else: archive_dir = os.getcwd() analysis_dir = None if "base_dir" in analysis_cfg: analysis_dir = os.path.join(analysis_cfg["base_dir"], run_id) if analysis_dir is None or not os.path.exists(analysis_dir): analysis_dir = tempfile.mkdtemp() dirs = {"work": os.path.normpath(analysis_dir), "flowcell": os.path.normpath(archive_dir)} assert os.path.exists(dirs["flowcell"]), \ "The flowcell directory, {}, could not be found".format(dirs["flowcell"]) assert os.path.exists(dirs["work"]), \ "The work directory, {}, could not be found".format(dirs["work"]) if run_info_file is None: run_info_file = os.path.join(dirs["flowcell"], "run_info.yaml") if not os.path.exists(run_info_file): # Locate the samplesheet and convert to yaml samplesheet = _find_samplesheet(dirs["flowcell"]) assert samplesheet, \ "Could not locate samplesheet in {}, aborting..".format(dirs["flowcell"]) fh, run_info_file = tempfile.mkstemp() os.close(fh) run_info_file = ssheet.csv2yaml(samplesheet,run_info_file) assert os.path.exists(run_info_file), \ "The run info configuration file, {}, could not be found".format(run_info_file) fc_name, fc_date = fc.get_flowcell_info(dirs["flowcell"]) # If we have no bc_metrics files in the workdir, we may be looking at a Casava run. # In that case, attempt to parse the Demultiplex_Stats.htm file and create bc_metrics files metric_files = glob.glob(os.path.join(dirs["work"], "*_barcode", "*bc[_.]metrics")) + glob.glob(os.path.join(dirs["work"], "*bc[_.]metrics")) if len(metric_files) == 0: casava_report = _find_casava_report(dirs["flowcell"]) assert len(casava_report) > 0, \ "Could not locate CASAVA demultiplex report in {}, aborting..".format(dirs["flowcell"]) metric_files = _casava_report_to_metrics(run_info_file, casava_report, dirs) assert len(metric_files) > 0, \ "Could not locate or create required metric files, aborting.." print("A report will be created on Google Docs based on the demultiplexed data in {}".format(dirs["work"])) print("The configuration file is {0} and the run info file is {1}".format(config_file, run_info_file)) print("The run was started on {0} and has flowcell id {1}".format(fc_date, fc_name)) if not dryrun: create_report_on_gdocs(fc_date, fc_name, run_info_file, dirs, config) else: print("DRY-RUN: nothing uploaded")
def create_report_on_gdocs(*args): [fc_date, fc_name, run_info_yaml, dirs, config] = args return sequencing_report.create_report_on_gdocs(fc_date, fc_name, run_info_yaml, dirs, config)