def main(config_file, fc_dir, run_info_yaml=None): with open(config_file) as in_handle: config = yaml.load(in_handle) log_handler = create_log_handler(config, LOG_NAME) with log_handler.applicationbound(): run_main(config, config_file, fc_dir, run_info_yaml)
def main(config_file, fc_dir, project_dir, run_info_yaml=None, fc_alias=None, project_desc=None, lanes=None): if project_desc is None and lanes is None: log.error("No project description or lanes provided: cannot deliver files without this information") sys.exit() config = load_config(config_file) ## Set log file in project output directory config.update(log_dir=os.path.join(project_dir, "log")) log_handler = create_log_handler(config, log.name) fc_dir = os.path.normpath(fc_dir) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) with log_handler.applicationbound(): run_info = prune_run_info_by_description(run_info['details'], project_desc, lanes) if len(run_info) == 0: log.error("No lanes found with matching description %s: please check your flowcell run information" % project_desc) sys.exit() dirs = dict(fc_dir=fc_dir, project_dir=project_dir) fc_name, fc_date = get_flowcell_id(run_info, dirs['fc_dir']) config.update(fc_name = fc_name, fc_date = fc_date) config.update(fc_alias = "%s_%s" % (fc_date, fc_name) if not fc_alias else fc_alias) dirs.update(fc_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, config['fc_alias'] )) dirs.update(data_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, "%s_%s" %(fc_date, fc_name) )) with log_handler.applicationbound(): config = _make_delivery_directory(dirs, config) _save_run_info(run_info, dirs['fc_delivery_dir'], run_exit=options.only_run_info) run_main(run_info, config, dirs)
def main(local_config, post_config_file=None, process_msg=True, store_msg=True, qseq=True, fastq=True): config = load_config(local_config) log_handler = create_log_handler(config) with log_handler.applicationbound(): search_for_new(config, local_config, post_config_file, process_msg, store_msg, qseq, fastq)
def main(local_config, post_config_file=None, fetch_msg=True, process_msg=True, store_msg=True, backup_msg=False, qseq=True, fastq=True, remove_qseq=False, compress_fastq=False, casava=False): config = load_config(local_config) log_handler = create_log_handler(config, True) with log_handler.applicationbound(): search_for_new(config, local_config, post_config_file, fetch_msg, \ process_msg, store_msg, backup_msg, qseq, fastq, remove_qseq, compress_fastq, casava)
def main(galaxy_config, processing_config): amqp_config = read_galaxy_amqp_config(galaxy_config) config = load_config(processing_config) store_tag = config["msg_store_tag"] log_handler = create_log_handler(config, LOG_NAME) handlers = [(store_tag, store_handler(config, store_tag))] with log_handler.applicationbound(): message_reader(handlers, amqp_config)
def main(galaxy_config, local_config, process_msg=True, store_msg=True, qseq=True, fastq=True): amqp_config = _read_amqp_config(galaxy_config) with open(local_config) as in_handle: config = yaml.load(in_handle) log_handler = create_log_handler(config, LOG_NAME) with log_handler.applicationbound(): search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq)
def main(local_config, post_config_file=None, process_msg=True, store_msg=True, qseq=True, fastq=True): config = load_config(local_config) log_handler = create_log_handler(config, LOG_NAME) with log_handler.applicationbound(): search_for_new(config, local_config, post_config_file, process_msg, store_msg, qseq, fastq)
def main(*args, **kwargs): local_config = args[0] post_process_config = args[1] if len(args) > 1 else None kwargs["post_process_config"] = post_process_config config = load_config(local_config) log_handler = create_log_handler(config, True) with log_handler.threadbound(): search_for_new(config, local_config, **kwargs)
def main(galaxy_config, processing_config): amqp_config = _read_amqp_config(galaxy_config) with open(processing_config) as in_handle: config = yaml.load(in_handle) store_tag = config["msg_store_tag"] log_handler = create_log_handler(config, LOG_NAME) handlers = [(store_tag, store_handler(config, store_tag))] with log_handler.applicationbound(): message_reader(handlers, amqp_config)
def long_term_storage(remote_info, config_file): config = load_config(config_file) log_handler = create_log_handler(config, log.name) with log_handler.applicationbound(): log.info("Copying run data over to remote storage: %s" % config["store_host"]) log.debug("The contents from AMQP for this dataset are:\n %s" % remote_info) _copy_for_storage(remote_info, config)
def analyze_and_upload(remote_info, config_file): """Main entry point for analysis and upload to Galaxy. """ config = load_config(config_file) log_handler = create_log_handler(config, log.name) with log_handler.applicationbound(): fc_dir = _copy_from_sequencer(remote_info, config) analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file) _upload_to_galaxy(fc_dir, analysis_dir, remote_info, config, config_file)
def main(config_file, fc_dir, run_info_yaml=None): config = load_config(config_file) work_dir = os.getcwd() if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") def insert_command(record): record.extra["command"] = sys.argv record.extra["version"] = version.get_pipeline_version() setup_logging(config) handler = create_log_handler(config) with handler, logbook.Processor(insert_command): run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
def main(config_file, fc_dir, run_info_yaml=None): config = load_config(config_file) work_dir = os.getcwd() if config.get("log_dir", None) is None: config["log_dir"] = os.path.join(work_dir, "log") def insert_command(record): record.extra["command"] = sys.argv record.extra["version"] = version.get_pipeline_version() setup_logging(config) handler = create_log_handler(config) with handler, \ logbook.Processor(insert_command): run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
def report_to_statusdb(fc_name, fc_date, run_info_yaml, dirs, config): """ Create statusdb report on a couchdb server. A FlowcellQCMetrics object holds information about a flowcell. QC results are stored at the flowcell level and sample level depending on analysis. Lane level QC data are stored in the FlowcellQCMetrics object. """ success = True try: statusdb_config = config.get("statusdb", None) if statusdb_config is None: log.info( "Could not find statusdb section in configuration. No statusdb reporting will be done" ) return False statusdb_url = statusdb_config.get("url", None) if statusdb_url is None: log.warn("No url field found in statusdb configuration section.") return False # Add email notification email = statusdb_config.get("statusdb_email_notification", None) smtp_host = config.get("smtp_host", "") smtp_port = config.get("smtp_port", "") log_handler = create_log_handler( { 'email': email, 'smtp_host': smtp_host, 'smtp_port': smtp_port }, True) with log_handler.applicationbound(): with logbook.Processor(lambda record: record.extra.__setitem__( 'run', "%s_%s" % (fc_date, fc_name))): log.info( "Started creating QC Metrics report on statusdb for %s_%s on %s" % (fc_date, fc_name, datetime.now().isoformat())) # Create object and parse all available metrics; no checking # is currently done for missing files try: qc_obj = FlowcellQCMetrics(fc_date, fc_name, run_info_yaml, dirs.get("work", None), dirs.get("flowcell", None)) except: qc_obj = None # FIXME: error checking! if qc_obj is not None: try: # Save data at a sample level log.info("Connecting to server at %s" % statusdb_url) try: couch = couchdb.Server(url="http://%s" % statusdb_url) except: log.warn("Connecting to server at %s failed" % statusdb_url) log.info("Connecting to server at %s succeeded" % statusdb_url) db = couch['qc'] # Save samples for s in qc_obj.sample.keys(): obj = qc_obj.sample[s] log.info("Saving sample %s" % obj.name()) _save_obj(db, obj, statusdb_url) # Save flowcell object _save_obj(db, qc_obj, statusdb_url) except Exception as e: success = False else: log.warn( "Couldn't populate FlowcellQCMetrics object. No QC data written to statusdb for %s_%s" % (fc_date, fc_name)) success = False if success: log.info("QC Metrics report successfully written to statusdb for %s_%s on %s" \ % (fc_date, fc_name, datetime.now().isoformat())) else: log.warn("Encountered exception when writing to statusdb for %s_%s on %s" \ % (fc_date, fc_name, datetime.now().isoformat())) except Exception as e: success = False log.warn( "Encountered exception when writing QC metrics to statusdb: %s" % e) return success
def create_report_on_gdocs(fc_date, fc_name, run_info_yaml, dirs, config): """Create reports on gdocs containing both demultiplexed read counts and QC data. """ success = True try: # Inject the fc_date and fc_name in the email subject def record_processor(record): return record.extra.__setitem__('run', "%s_%s" % (fc_date, fc_name)) # Parse the run_info.yaml file log.debug("Loading this run_info: {}".format(run_info_yaml)) with open(run_info_yaml, "r") as fh: run_info = yaml.load(fh) # Get the gdocs account credentials encoded_credentials = get_credentials(config) if not encoded_credentials: log.warn("Could not find Google Docs account credentials in configuration. \ No sequencing report was written") return False # Get the required parameters from the post_process.yaml configuration file gdocs = config.get("gdocs_upload", None) # Add email notification email = gdocs.get("gdocs_email_notification", None) smtp_host = config.get("smtp_host", "") smtp_port = config.get("smtp_port", "") log_handler = create_log_handler({'email': email, \ 'smtp_host': smtp_host, \ 'smtp_port': smtp_port}, True) except Exception as e: success = False log.warn("Encountered exception when writing sequencing report to Google Docs: %s" % e) with log_handler.applicationbound(), logbook.Processor(record_processor): try: log.info("Started creating sequencing report on Google docs for %s_%s on %s" \ % (fc_date, fc_name, datetime.datetime.now().isoformat())) # Get a flowcell object fc = Flowcell(fc_name, fc_date, run_info, dirs.get("work", None)) # Get the GDocs demultiplex result file title gdocs_dmplx_spreadsheet = gdocs.get("gdocs_dmplx_file", None) # Get the GDocs QC file title gdocs_qc_spreadsheet = gdocs.get("gdocs_qc_file", None) # FIXME: Make the bc stuff use the Flowcell module if gdocs_dmplx_spreadsheet is not None: # Upload the data bc_metrics.write_run_report_to_gdocs(fc, fc_date, \ fc_name, gdocs_dmplx_spreadsheet, encoded_credentials, append=True) else: log.warn("Could not find Google Docs demultiplex results file \ title in configuration. No demultiplex counts were \ written to Google Docs for %s_%s" % (fc_date, fc_name)) # Parse the QC metrics try: qc = RTAQCMetrics(dirs.get("flowcell", None)) except: qc = None if gdocs_qc_spreadsheet is not None and qc is not None: qc_metrics.write_run_report_to_gdocs(fc, qc, gdocs_qc_spreadsheet, encoded_credentials) else: log.warn("Could not find Google Docs QC file title in configuration. " \ "No QC data were written to Google Docs " \ "for %s_%s".format(fc_date, fc_name)) # Get the projects parent folder projects_folder = gdocs.get("gdocs_projects_folder", None) # Write the bc project summary report if projects_folder is not None: create_project_report_on_gdocs(fc, qc, \ encoded_credentials, projects_folder) except Exception as e: success = False log.warn("Encountered exception when writing sequencing report " \ "to Google Docs: {}".format(e)) if success: log.info("Sequencing report successfully created on Google " \ "docs for {}_{} on {}".format(fc_date, fc_name, datetime.datetime.now().isoformat())) else: log.warn("Encountered exception when writing sequencing " \ "report for %s_%s to Google docs on %s" \ % (fc_date, fc_name, datetime.datetime.now().isoformat())) return success
def _get_log_handler(self, config): log_handler = create_log_handler(config, True) return log_handler
def create_report_on_gdocs(fc_date, fc_name, run_info_yaml, dirs, config): """Create reports on gdocs containing both demultiplexed read counts and QC data. """ success = True try: # Inject the fc_date and fc_name in the email subject def record_processor(record): return record.extra.__setitem__('run', "%s_%s" % (fc_date, fc_name)) # Parse the run_info.yaml file log.debug("Loading this run_info: {}".format(run_info_yaml)) with open(run_info_yaml, "r") as fh: run_info = yaml.load(fh) # Get the gdocs account credentials encoded_credentials = get_credentials(config) if not encoded_credentials: log.warn( "Could not find Google Docs account credentials in configuration. \ No sequencing report was written") return False # Get the required parameters from the post_process.yaml configuration file gdocs = config.get("gdocs_upload", None) # Add email notification email = gdocs.get("gdocs_email_notification", None) smtp_host = config.get("smtp_host", "") smtp_port = config.get("smtp_port", "") log_handler = create_log_handler({'email': email, \ 'smtp_host': smtp_host, \ 'smtp_port': smtp_port}, True) except Exception as e: success = False log.warn( "Encountered exception when writing sequencing report to Google Docs: %s" % e) with log_handler.applicationbound(), logbook.Processor(record_processor): try: log.info("Started creating sequencing report on Google docs for %s_%s on %s" \ % (fc_date, fc_name, datetime.datetime.now().isoformat())) # Get a flowcell object fc = Flowcell(fc_name, fc_date, run_info, dirs.get("work", None)) # Get the GDocs demultiplex result file title gdocs_dmplx_spreadsheet = gdocs.get("gdocs_dmplx_file", None) # Get the GDocs QC file title gdocs_qc_spreadsheet = gdocs.get("gdocs_qc_file", None) # FIXME: Make the bc stuff use the Flowcell module if gdocs_dmplx_spreadsheet is not None: # Upload the data bc_metrics.write_run_report_to_gdocs(fc, fc_date, \ fc_name, gdocs_dmplx_spreadsheet, encoded_credentials, append=True) else: log.warn("Could not find Google Docs demultiplex results file \ title in configuration. No demultiplex counts were \ written to Google Docs for %s_%s" % (fc_date, fc_name)) # Parse the QC metrics try: qc = RTAQCMetrics(dirs.get("flowcell", None)) except: qc = None if gdocs_qc_spreadsheet is not None and qc is not None: qc_metrics.write_run_report_to_gdocs(fc, qc, gdocs_qc_spreadsheet, encoded_credentials) else: log.warn("Could not find Google Docs QC file title in configuration. " \ "No QC data were written to Google Docs " \ "for %s_%s".format(fc_date, fc_name)) # Get the projects parent folder projects_folder = gdocs.get("gdocs_projects_folder", None) # Write the bc project summary report if projects_folder is not None: create_project_report_on_gdocs(fc, qc, \ encoded_credentials, projects_folder) except Exception as e: success = False log.warn("Encountered exception when writing sequencing report " \ "to Google Docs: {}".format(e)) if success: log.info("Sequencing report successfully created on Google " \ "docs for {}_{} on {}".format(fc_date, fc_name, datetime.datetime.now().isoformat())) else: log.warn("Encountered exception when writing sequencing " \ "report for %s_%s to Google docs on %s" \ % (fc_date, fc_name, datetime.datetime.now().isoformat())) return success
def long_term_storage(remote_info, config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) log_handler = create_log_handler(config, log.name) with log_handler.applicationbound(): _copy_for_storage(remote_info, config)
def report_to_statusdb(fc_name, fc_date, run_info_yaml, dirs, config): """ Create statusdb report on a couchdb server. A FlowcellQCMetrics object holds information about a flowcell. QC results are stored at the flowcell level and sample level depending on analysis. Lane level QC data are stored in the FlowcellQCMetrics object. """ success = True try: statusdb_config = config.get("statusdb", None) if statusdb_config is None: log.info("Could not find statusdb section in configuration. No statusdb reporting will be done") return False statusdb_url = statusdb_config.get("url", None) if statusdb_url is None: log.warn("No url field found in statusdb configuration section.") return False # Add email notification email = statusdb_config.get("statusdb_email_notification", None) smtp_host = config.get("smtp_host", "") smtp_port = config.get("smtp_port", "") log_handler = create_log_handler({'email': email, 'smtp_host': smtp_host, 'smtp_port': smtp_port}, True) with log_handler.applicationbound(): with logbook.Processor(lambda record: record.extra.__setitem__('run', "%s_%s" % (fc_date, fc_name))): log.info("Started creating QC Metrics report on statusdb for %s_%s on %s" % (fc_date, fc_name, datetime.now().isoformat())) # Create object and parse all available metrics; no checking # is currently done for missing files try: qc_obj = FlowcellQCMetrics(fc_date, fc_name, run_info_yaml, dirs.get("work", None), dirs.get("flowcell", None)) except: qc_obj = None # FIXME: error checking! if qc_obj is not None: try: # Save data at a sample level log.info("Connecting to server at %s" % statusdb_url) try: couch = couchdb.Server(url="http://%s" % statusdb_url) except: log.warn("Connecting to server at %s failed" % statusdb_url) log.info("Connecting to server at %s succeeded" % statusdb_url) db=couch['qc'] # Save samples for s in qc_obj.sample.keys(): obj = qc_obj.sample[s] log.info("Saving sample %s" % obj.name()) _save_obj(db, obj, statusdb_url) # Save flowcell object _save_obj(db, qc_obj, statusdb_url) except Exception as e: success = False else: log.warn("Couldn't populate FlowcellQCMetrics object. No QC data written to statusdb for %s_%s" % (fc_date, fc_name)) success = False if success: log.info("QC Metrics report successfully written to statusdb for %s_%s on %s" \ % (fc_date, fc_name, datetime.now().isoformat())) else: log.warn("Encountered exception when writing to statusdb for %s_%s on %s" \ % (fc_date, fc_name, datetime.now().isoformat())) except Exception as e: success = False log.warn("Encountered exception when writing QC metrics to statusdb: %s" % e) return success
def main(config_file, fc_dir, run_info_yaml=None): config = load_config(config_file) log_handler = create_log_handler(config, log.name) with log_handler.applicationbound(): run_main(config, config_file, os.path.normpath(fc_dir), run_info_yaml)