def __call__(self): update = {"$set": {STATUS: JOB_STATUS.running, # @UndefinedVariable START_DATESTAMP: datetime.today()}} query = {UUID: self.uuid} self.db_connector.update(SA_EXPLORATORY_COLLECTION, query, update) try: safe_make_dirs(self.tmp_path) offline_analysis(self.exp_def_name, self.ac_result_path, self.tmp_tsv_path, ignored_dyes=self.ignored_dyes, data_set_name=self.job_name) if not all(os.path.isfile(f) for f in [self.tmp_tsv_path, self.tmp_scatter_fn, self.tmp_scatter_ind_fn, self.tmp_kde_fn, self.tmp_kde_ind_fn]): raise Exception("Secondary analysis exploratory job " + "failed: one or more output file(s) not generated.") else: shutil.copy(self.tmp_tsv_path, self.tsv_path) shutil.copy(self.tmp_scatter_fn, self.scatter_fn) shutil.copy(self.tmp_scatter_ind_fn, self.scatter_ind_fn) shutil.copy(self.tmp_kde_fn, self.kde_fn) shutil.copy(self.tmp_kde_ind_fn, self.kde_ind_fn) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(self.tmp_path, ignore_errors=True)
def execute_process(archive_path, dyes, device, major, minor, offsets, use_iid, outfile_path, config_path, uuid): ''' Execute the primary analysis process command. This function copies the provided archive to tmp space and executes primary analysis process on all PNGs found in the archive. @param archive_path - Archive directory path where the TDI images live. @param dyes - Set of dyes used in this run. @param device - Device used to generate the TDI images for this run. @param major - Major dye profile version. @param minor - Minor dye profile version. @param offsets - Range of offsets used to infer a dye model. The inference will offset the dye profiles in this range to determine an optimal offset. @param use_iid - Use IID Peak Detection. @param outfile_path - Path where the final analysis.txt file should live. @param config_path - Path where the final configuration file should live. @param uuid - Unique identifier for this job. ''' tmp_path = os.path.join(TMP_PATH, uuid) tmp_config_path = os.path.join(tmp_path, "config.txt") try: # shutil.copytree does not play nicely when copying from samba drive to # Mac, so use a system command. io_utilities.safe_make_dirs(TMP_PATH) os.system("cp -fr %s %s" % (pipes.quote(archive_path), pipes.quote(tmp_path))) with open(tmp_config_path, "w") as f: print >> f, "dye_map:" print >> f, " device: %s" % device if major is not None: print >> f, " major: %s" % major if minor is not None: print >> f, " minor: %s" % minor print >> f, " dyes: [%s]" % ", ".join( ["\"%s\"" % x for x in dyes]) images = io_utilities.filter_files(os.listdir(tmp_path), VALID_HAM_IMAGE_EXTENSIONS) images = [os.path.join(tmp_path, image) for image in images] # Run primary analysis process process(tmp_config_path, images, tmp_path, offsets=offsets, use_iid=use_iid) # Ensure output file exists analysis_output_path = os.path.join(tmp_path, "analysis.txt") if not os.path.isfile(analysis_output_path): raise Exception("Process job failed: analysis.txt not generated.") else: shutil.copy(analysis_output_path, outfile_path) shutil.copy(tmp_config_path, config_path) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(tmp_path, ignore_errors=True)
def save(self): try: safe_make_dirs(self.tmp_path) combine_sa = self._combine_sa(self.tmp_sa_path, self.id_report_path, self.png_path, self.png_sum_path, self.kde_path, self.kde_sum_path) if not combine_sa: raise Exception( "Failed to combine secondary analysis results.") if not os.path.isfile(self.tmp_sa_path): raise Exception( "Failed to find temporary combined secondary analysis file" ) self._merge_pdfs(self.tmp_pdf_path, self.vcf_pdf_path, self.tmp_sa_path) if not os.path.isfile(self.tmp_pdf_path): raise Exception("Failed to merge PDF files.") shutil.copy(self.tmp_pdf_path, self.fa_pdf_path) finally: shutil.rmtree(self.tmp_path, ignore_errors=True)
def start(current_info): ''' Start an instance of the server. ''' io_utilities.safe_make_dirs(os.path.dirname(TORNADO_LOG_FILE_PREFIX)) tornado.options.options.log_file_prefix = TORNADO_LOG_FILE_PREFIX tornado.options.parse_command_line() # Delete running or submitted jobs # Delete TSV outputs of old jobs GENERAL_LOGGER.info( "Deleting records of unfinished jobs from databse. Deleting TSV outputs of old jobs." ) for collection in [ PA_PROCESS_COLLECTION, SA_IDENTITY_COLLECTION, SA_ASSAY_CALLER_COLLECTION, SA_GENOTYPER_COLLECTION, SA_EXPLORATORY_COLLECTION, FA_PROCESS_COLLECTION ]: try: # Do not remove VCF/TSV outputs of genotyper and exploratory analysis if collection not in [ SA_GENOTYPER_COLLECTION, SA_EXPLORATORY_COLLECTION ]: io_utilities.delete_tsv(collection) io_utilities.delete_unfinished_jobs(collection) except: GENERAL_LOGGER.exception( "Failure deleting records of unfinished jobs or TSVs of old jobs." ) GENERAL_LOGGER.info("Starting up server on machine %s and port %s at %s." % (current_info[MACHINE], current_info[PORT_HEADER], time.strftime("%I:%M:%S"))) tr = WSGIContainer(app) application = Application([ (r"/tornado", MainHandler), (r".*", FallbackHandler, dict(fallback=tr)), ]) # Max file upload size == MAX_BUFFER_SIZE application.listen(PORT, max_buffer_size=MAX_BUFFER_SIZE) # Gracefully handle server shutdown. signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) # Add the current info to the running info file. write_running_info([current_info]) IOLoop.instance().start()
def execute_convert_images(archive, outfile_path, uuid): ''' Execute the primary analysis convert_imgs command. This function copies the provided archive to tmp space and executes primary analysis convert_imgs on all binaries found in the archive. @param archive - Archive directory name where the TDI images live. @param outfile_path - File path to final destination of image tar.gz file. @param uuid - Unique identifier for this job. ''' archive_path = os.path.join(ARCHIVES_PATH, archive) tmp_path = os.path.join(TMP_PATH, uuid) destination = os.path.join(TMP_PATH, uuid, archive) destination = os.path.abspath(destination) try: # shutil.copytree does not play nicely when copying from samba drive to # Mac, so use a system command. io_utilities.safe_make_dirs(TMP_PATH) os.system("cp -fr %s %s" % (archive_path, tmp_path)) images = io_utilities.filter_files(os.listdir(tmp_path), extensions=["bin"]) images = [os.path.join(tmp_path, image) for image in images] # Run primary analysis process convert_images(images, "png", destination) # Ensure images were converted, and if so create archive if os.path.exists(destination) and \ len([x for x in os.listdir(destination) if x.endswith(".png")]) > 0: shutil.make_archive(destination, format='gztar', root_dir=os.path.dirname(destination), base_dir=os.path.basename(destination)) else: raise Exception("Convert images job failed: no images converted.") # Ensure archive exists out_tar_gz = destination + ".tar.gz" if os.path.exists(out_tar_gz): shutil.copy(out_tar_gz, outfile_path) else: raise Exception("Convert images job failed: no archive created.") finally: pass # Regardless of success or failure, remove the copied archive directory shutil.rmtree(tmp_path, ignore_errors=True)
def __call__(self): update = { "$set": { STATUS: JOB_STATUS.running, # @UndefinedVariable START_DATESTAMP: datetime.today() } } query = {UUID: self.uuid} self.db_connector.update(SA_GENOTYPER_COLLECTION, query, update) try: safe_make_dirs(self.tmp_path) exp_def_fetcher = ExpDefHandler() experiment = exp_def_fetcher.get_experiment_definition( self.exp_def_name) GenotypeProcessor(experiment, None, self.tmp_outfile_path, required_drops=self.required_drops, in_file=self.ac_result_path, ignored_dyes=self.ignored_dyes, mask_code=self.mask_code, combine_alleles=self.combine_alleles) if not os.path.isfile(self.tmp_outfile_path): raise Exception("Secondary analysis genotyper job " + "failed: VCF file not generated.") else: shutil.copy(self.tmp_outfile_path, self.outfile_path) if not os.path.isfile(self.tmp_outfile_path[:-3] + PDF): raise Exception("Secondary analysis genotyper job " + "failed: PDF file not generated.") else: shutil.copy(self.tmp_outfile_path[:-3] + PDF, self.outfile_path[:-3] + PDF) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(self.tmp_path, ignore_errors=True)
def __call__(self): update = { '$set': { STATUS: JOB_STATUS.running, # @UndefinedVariable START_DATESTAMP: datetime.today() } } query = {UUID: self.uuid} self.db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) def gen_dye_scatterplot(dyes, sys_listener_path): try: analysis_df = pandas.read_table(self.analysis_file, sep=sniff_delimiter( self.analysis_file)) ac_df = pandas.read_table(self.tmp_outfile_path, sep=sniff_delimiter( self.tmp_outfile_path)) analysis_df['assay'] = False analysis_df.loc[analysis_df['identity'].notnull(), 'assay'] = ac_df['assay'].values # System listener inputs dyn_align_offsets = {} temps = {} steps = {} if sys_listener_path is not None: sys_listener_dir = os.path.dirname(sys_listener_path) clamp_temp_tp = ClampTempTopicParser() old_channel_offset_tp = OldChannelOffsetTopicParser() channel_offset_tp = ChannelOffsetTopicParser() dyn_align_steps_tp = DynamicAlignStepsParser() topic_parsers = [ clamp_temp_tp, old_channel_offset_tp, channel_offset_tp, dyn_align_steps_tp ] sys_listener_parser = SystemListenerParser( sys_listener_dir, topic_parsers=topic_parsers) temps = sys_listener_parser.get_topic_results( clamp_temp_tp.topic) dyn_align_offsets = sys_listener_parser.get_topic_results( channel_offset_tp.topic) if len(dyn_align_offsets) < 1: APP_LOGGER.info("Using old channel offset parser...") dyn_align_offsets = sys_listener_parser.get_topic_results( old_channel_offset_tp.topic) else: APP_LOGGER.info("Using new channel offset parser...") steps = sys_listener_parser.get_topic_results( dyn_align_steps_tp.topic) generate_dye_scatterplots(analysis_df, dyes, self.tmp_dyes_plot_path, self.job_name, self.pico1_dye, dyn_align_offsets=dyn_align_offsets, temps=temps, steps=steps) shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path) APP_LOGGER.info("Dyes scatter plot generated for %s." % \ self.job_name) except: APP_LOGGER.exception("Dyes scatter plot generation failed.") try: safe_make_dirs(self.tmp_path) exp_def_fetcher = ExpDefHandler() experiment = exp_def_fetcher.get_experiment_definition( self.exp_def_name) model_file_dict = available_models(self.ac_method) if self.ac_model is None: classifier_file = None else: if self.ac_model in model_file_dict: classifier_file = model_file_dict[self.ac_model] else: APP_LOGGER.exception( "Assay caller model %s is unavailable for method %s." % (self.ac_method, self.ac_model)) raise Exception( "Assay caller model %s is unavailable for method %s." % (self.ac_method, self.ac_model)) AssayCallManager(self.num_probes, in_file=self.analysis_file, out_file=self.tmp_outfile_path, scatter_plot_file=self.tmp_scatter_plot_path, training_factor=self.training_factor, assay=self.assay_dye, fiducial=self.pico2_dye, controls=experiment.negative_controls.barcodes, ctrl_thresh=self.ctrl_thresh, n_jobs=8, controls_filtering=self.ctrl_filter, assay_type=self.ac_method, classifier_file=classifier_file) if not os.path.isfile(self.tmp_outfile_path): raise Exception('Secondary analysis assay caller job ' + 'failed: output file not generated.') shutil.copy(self.tmp_outfile_path, self.outfile_path) gen_dye_scatterplot(experiment.dyes, self.get_sys_listener_path()) if os.path.isfile(self.tmp_scatter_plot_path): shutil.copy(self.tmp_scatter_plot_path, self.scatter_plot_path) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(self.tmp_path, ignore_errors=True)
def __call__(self): # retrieve primary analysis data primary_analysis_doc = self.db_connector.find( PA_PROCESS_COLLECTION, criteria={UUID: self.primary_analysis_uuid}, projection={ID: 0, RESULT: 1, UUID: 1, DYES: 1})[0] # verify barcode dyes primary_analysis_dyes = set(primary_analysis_doc[DYES]) identity_dyes = set([x[0] for x in self.dye_levels]) if not identity_dyes.issubset(set(primary_analysis_dyes)): raise Exception("Dyes in levels: %s must be a subset of run dyes: %s" % (identity_dyes, primary_analysis_dyes)) # verify primary analysis file exists if not os.path.isfile(primary_analysis_doc[RESULT]): raise InvalidFileError(primary_analysis_doc[RESULT]) # update database to indicate job is running update = {"$set": {STATUS: JOB_STATUS.running, START_DATESTAMP: datetime.today()}} self.db_connector.update(SA_IDENTITY_COLLECTION, {UUID: self.uuid}, update) try: # for full analysis the user may want to turn off picoinjection filtering # even if there is a pico1 dye. If use_pico1_filter is False, set pico1_dye to None if not self.use_pico1_filter: self.pico1_dye = None if not self.use_pico2_filter: self.pico2_dye = None safe_make_dirs(self.tmp_path) plate_base_path = os.path.join(self.tmp_path, 'tmp_plot') OfflineIdentity(in_path=primary_analysis_doc[RESULT], num_probes=self.num_probes, factory_type=FACTORY_ORGANIC, plot_base_path=plate_base_path, out_file=self.tmp_outfile_path, report_path=self.tmp_report_path, assay_dye=self.assay_dye, pico1_dye=self.pico1_dye, pico2_dye=self.pico2_dye, dye_levels=self.dye_levels, show_figure=False, ignored_dyes=self.ignored_dyes, filtered_dyes=self.filtered_dyes, uninjected_threshold=self.ui_threshold, dev_mode=self.dev_mode, use_pico_thresh=self.use_pico_thresh, max_uninj_ratio=self.max_uninj_ratio, ignore_lowest_barcode=self.ignore_lowest_barcode, drift_compensate=self.drift_compensate).execute() if not os.path.isfile(self.tmp_outfile_path): raise Exception("Secondary analysis identity job failed: identity output file not generated.") else: shutil.copy(self.tmp_outfile_path, self.outfile_path) tmp_plot_path = plate_base_path + ID_PLOT_SUFFIX tmp_plate_plot_path = plate_base_path + ID_PLATES_PLOT_SUFFIX tmp_temporal_plot_path = plate_base_path + ID_TEMPORAL_PLOT_SUFFIX tmp_drop_count_plot_path = plate_base_path + ID_DROP_COUNT_PLOT_SUFFIX if os.path.isfile(tmp_plot_path): shutil.copy(tmp_plot_path, self.plot_path) if os.path.isfile(tmp_plate_plot_path): shutil.copy(tmp_plate_plot_path, self.plate_plot_path) if os.path.isfile(tmp_temporal_plot_path): shutil.copy(tmp_temporal_plot_path, self.temporal_plot_path) if os.path.isfile(tmp_drop_count_plot_path): shutil.copy(tmp_drop_count_plot_path, self.drop_count_plot_path) if os.path.isfile(self.tmp_report_path): shutil.copy(self.tmp_report_path, self.report_path) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(self.tmp_path, ignore_errors=True)
restart_server = args.restart start_server = args.start stop_server = args.stop show_status = args.status except KeyboardInterrupt: ### handle keyboard interrupt ### return 0 except Exception, e: indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help") return 2 io_utilities.safe_make_dirs(HOME_DIR) io_utilities.safe_make_dirs(TARGETS_UPLOAD_PATH) io_utilities.safe_make_dirs(PROBES_UPLOAD_PATH) io_utilities.safe_make_dirs(PLATES_UPLOAD_PATH) io_utilities.safe_make_dirs(RESULTS_PATH) io_utilities.safe_make_dirs(REFS_PATH) # Clean up tmp dir. shutil.rmtree(TMP_PATH, ignore_errors=True) io_utilities.safe_make_dirs(TMP_PATH) # Update database with latest information update_archives() update_devices() update_dyes()