Пример #1
0
    def __call__(self):
        update = {"$set": {STATUS: JOB_STATUS.running,      # @UndefinedVariable
                           START_DATESTAMP: datetime.today()}}
        query = {UUID: self.uuid}
        self.db_connector.update(SA_EXPLORATORY_COLLECTION, query, update)
        try:
            safe_make_dirs(self.tmp_path)

            offline_analysis(self.exp_def_name,
                             self.ac_result_path,
                             self.tmp_tsv_path,
                             ignored_dyes=self.ignored_dyes,
                             data_set_name=self.job_name)

            if not all(os.path.isfile(f) for f in [self.tmp_tsv_path, self.tmp_scatter_fn,
                    self.tmp_scatter_ind_fn, self.tmp_kde_fn, self.tmp_kde_ind_fn]):
                raise Exception("Secondary analysis exploratory job " +
                                "failed: one or more output file(s) not generated.")
            else:
                shutil.copy(self.tmp_tsv_path, self.tsv_path)
                shutil.copy(self.tmp_scatter_fn, self.scatter_fn)
                shutil.copy(self.tmp_scatter_ind_fn, self.scatter_ind_fn)
                shutil.copy(self.tmp_kde_fn, self.kde_fn)
                shutil.copy(self.tmp_kde_ind_fn, self.kde_ind_fn)
        finally:
            # Regardless of success or failure, remove the copied archive directory
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #2
0
def execute_process(archive_path, dyes, device, major, minor, offsets, use_iid,
                    outfile_path, config_path, uuid):
    '''
    Execute the primary analysis process command. This function copies the
    provided archive to tmp space and executes primary analysis process on
    all PNGs found in the archive.

    @param archive_path - Archive directory path where the TDI images live.
    @param dyes         - Set of dyes used in this run.
    @param device       - Device used to generate the TDI images for this run.
    @param major        - Major dye profile version.
    @param minor        - Minor dye profile version.
    @param offsets      - Range of offsets used to infer a dye model. The
                          inference will offset the dye profiles in this range
                          to determine an optimal offset.
    @param use_iid      - Use IID Peak Detection.
    @param outfile_path - Path where the final analysis.txt file should live.
    @param config_path  - Path where the final configuration file should live.
    @param uuid         - Unique identifier for this job.
    '''
    tmp_path = os.path.join(TMP_PATH, uuid)
    tmp_config_path = os.path.join(tmp_path, "config.txt")
    try:
        # shutil.copytree does not play nicely when copying from samba drive to
        # Mac, so use a system command.
        io_utilities.safe_make_dirs(TMP_PATH)
        os.system("cp -fr %s %s" %
                  (pipes.quote(archive_path), pipes.quote(tmp_path)))

        with open(tmp_config_path, "w") as f:
            print >> f, "dye_map:"
            print >> f, "  device: %s" % device
            if major is not None:
                print >> f, "  major: %s" % major
            if minor is not None:
                print >> f, "  minor: %s" % minor
            print >> f, "  dyes: [%s]" % ", ".join(
                ["\"%s\"" % x for x in dyes])

        images = io_utilities.filter_files(os.listdir(tmp_path),
                                           VALID_HAM_IMAGE_EXTENSIONS)
        images = [os.path.join(tmp_path, image) for image in images]

        # Run primary analysis process
        process(tmp_config_path,
                images,
                tmp_path,
                offsets=offsets,
                use_iid=use_iid)

        # Ensure output file exists
        analysis_output_path = os.path.join(tmp_path, "analysis.txt")
        if not os.path.isfile(analysis_output_path):
            raise Exception("Process job failed: analysis.txt not generated.")
        else:
            shutil.copy(analysis_output_path, outfile_path)
            shutil.copy(tmp_config_path, config_path)
    finally:
        # Regardless of success or failure, remove the copied archive directory
        shutil.rmtree(tmp_path, ignore_errors=True)
Пример #3
0
    def save(self):
        try:
            safe_make_dirs(self.tmp_path)

            combine_sa = self._combine_sa(self.tmp_sa_path,
                                          self.id_report_path, self.png_path,
                                          self.png_sum_path, self.kde_path,
                                          self.kde_sum_path)
            if not combine_sa:
                raise Exception(
                    "Failed to combine secondary analysis results.")

            if not os.path.isfile(self.tmp_sa_path):
                raise Exception(
                    "Failed to find temporary combined secondary analysis file"
                )

            self._merge_pdfs(self.tmp_pdf_path, self.vcf_pdf_path,
                             self.tmp_sa_path)

            if not os.path.isfile(self.tmp_pdf_path):
                raise Exception("Failed to merge PDF files.")

            shutil.copy(self.tmp_pdf_path, self.fa_pdf_path)
        finally:
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #4
0
def start(current_info):
    '''
    Start an instance of the server.
    '''
    io_utilities.safe_make_dirs(os.path.dirname(TORNADO_LOG_FILE_PREFIX))
    tornado.options.options.log_file_prefix = TORNADO_LOG_FILE_PREFIX
    tornado.options.parse_command_line()

    # Delete running or submitted jobs
    # Delete TSV outputs of old jobs
    GENERAL_LOGGER.info(
        "Deleting records of unfinished jobs from databse. Deleting TSV outputs of old jobs."
    )
    for collection in [
            PA_PROCESS_COLLECTION, SA_IDENTITY_COLLECTION,
            SA_ASSAY_CALLER_COLLECTION, SA_GENOTYPER_COLLECTION,
            SA_EXPLORATORY_COLLECTION, FA_PROCESS_COLLECTION
    ]:
        try:
            # Do not remove VCF/TSV outputs of genotyper and exploratory analysis
            if collection not in [
                    SA_GENOTYPER_COLLECTION, SA_EXPLORATORY_COLLECTION
            ]:
                io_utilities.delete_tsv(collection)
            io_utilities.delete_unfinished_jobs(collection)
        except:
            GENERAL_LOGGER.exception(
                "Failure deleting records of unfinished jobs or TSVs of old jobs."
            )

    GENERAL_LOGGER.info("Starting up server on machine %s and port %s at %s." %
                        (current_info[MACHINE], current_info[PORT_HEADER],
                         time.strftime("%I:%M:%S")))

    tr = WSGIContainer(app)
    application = Application([
        (r"/tornado", MainHandler),
        (r".*", FallbackHandler, dict(fallback=tr)),
    ])

    # Max file upload size == MAX_BUFFER_SIZE
    application.listen(PORT, max_buffer_size=MAX_BUFFER_SIZE)

    # Gracefully handle server shutdown.
    signal.signal(signal.SIGTERM, sig_handler)
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGQUIT, sig_handler)

    # Add the current info to the running info file.
    write_running_info([current_info])

    IOLoop.instance().start()
Пример #5
0
def execute_convert_images(archive, outfile_path, uuid):
    '''
    Execute the primary analysis convert_imgs command. This function copies the
    provided archive to tmp space and executes primary analysis convert_imgs on
    all binaries found in the archive.

    @param archive      - Archive directory name where the TDI images live.
    @param outfile_path - File path to final destination of image tar.gz file.
    @param uuid         - Unique identifier for this job.
    '''
    archive_path = os.path.join(ARCHIVES_PATH, archive)
    tmp_path = os.path.join(TMP_PATH, uuid)
    destination = os.path.join(TMP_PATH, uuid, archive)
    destination = os.path.abspath(destination)
    try:
        # shutil.copytree does not play nicely when copying from samba drive to
        # Mac, so use a system command.
        io_utilities.safe_make_dirs(TMP_PATH)
        os.system("cp -fr %s %s" % (archive_path, tmp_path))

        images = io_utilities.filter_files(os.listdir(tmp_path),
                                           extensions=["bin"])
        images = [os.path.join(tmp_path, image) for image in images]

        # Run primary analysis process
        convert_images(images, "png", destination)

        # Ensure images were converted, and if so create archive
        if os.path.exists(destination) and \
           len([x for x in os.listdir(destination) if x.endswith(".png")]) > 0:
            shutil.make_archive(destination,
                                format='gztar',
                                root_dir=os.path.dirname(destination),
                                base_dir=os.path.basename(destination))
        else:
            raise Exception("Convert images job failed: no images converted.")

        # Ensure archive exists
        out_tar_gz = destination + ".tar.gz"
        if os.path.exists(out_tar_gz):
            shutil.copy(out_tar_gz, outfile_path)
        else:
            raise Exception("Convert images job failed: no archive created.")
    finally:
        pass
        # Regardless of success or failure, remove the copied archive directory
        shutil.rmtree(tmp_path, ignore_errors=True)
Пример #6
0
    def __call__(self):
        update = {
            "$set": {
                STATUS: JOB_STATUS.running,  # @UndefinedVariable
                START_DATESTAMP: datetime.today()
            }
        }
        query = {UUID: self.uuid}
        self.db_connector.update(SA_GENOTYPER_COLLECTION, query, update)
        try:
            safe_make_dirs(self.tmp_path)

            exp_def_fetcher = ExpDefHandler()
            experiment = exp_def_fetcher.get_experiment_definition(
                self.exp_def_name)
            GenotypeProcessor(experiment,
                              None,
                              self.tmp_outfile_path,
                              required_drops=self.required_drops,
                              in_file=self.ac_result_path,
                              ignored_dyes=self.ignored_dyes,
                              mask_code=self.mask_code,
                              combine_alleles=self.combine_alleles)

            if not os.path.isfile(self.tmp_outfile_path):
                raise Exception("Secondary analysis genotyper job " +
                                "failed: VCF file not generated.")
            else:
                shutil.copy(self.tmp_outfile_path, self.outfile_path)

            if not os.path.isfile(self.tmp_outfile_path[:-3] + PDF):
                raise Exception("Secondary analysis genotyper job " +
                                "failed: PDF file not generated.")
            else:
                shutil.copy(self.tmp_outfile_path[:-3] + PDF,
                            self.outfile_path[:-3] + PDF)
        finally:
            # Regardless of success or failure, remove the copied archive directory
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #7
0
    def __call__(self):
        update = {
            '$set': {
                STATUS: JOB_STATUS.running,  # @UndefinedVariable
                START_DATESTAMP: datetime.today()
            }
        }
        query = {UUID: self.uuid}
        self.db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)

        def gen_dye_scatterplot(dyes, sys_listener_path):
            try:
                analysis_df = pandas.read_table(self.analysis_file,
                                                sep=sniff_delimiter(
                                                    self.analysis_file))
                ac_df = pandas.read_table(self.tmp_outfile_path,
                                          sep=sniff_delimiter(
                                              self.tmp_outfile_path))
                analysis_df['assay'] = False
                analysis_df.loc[analysis_df['identity'].notnull(),
                                'assay'] = ac_df['assay'].values

                # System listener inputs
                dyn_align_offsets = {}
                temps = {}
                steps = {}
                if sys_listener_path is not None:
                    sys_listener_dir = os.path.dirname(sys_listener_path)
                    clamp_temp_tp = ClampTempTopicParser()
                    old_channel_offset_tp = OldChannelOffsetTopicParser()
                    channel_offset_tp = ChannelOffsetTopicParser()
                    dyn_align_steps_tp = DynamicAlignStepsParser()
                    topic_parsers = [
                        clamp_temp_tp, old_channel_offset_tp,
                        channel_offset_tp, dyn_align_steps_tp
                    ]
                    sys_listener_parser = SystemListenerParser(
                        sys_listener_dir, topic_parsers=topic_parsers)
                    temps = sys_listener_parser.get_topic_results(
                        clamp_temp_tp.topic)
                    dyn_align_offsets = sys_listener_parser.get_topic_results(
                        channel_offset_tp.topic)
                    if len(dyn_align_offsets) < 1:
                        APP_LOGGER.info("Using old channel offset parser...")
                        dyn_align_offsets = sys_listener_parser.get_topic_results(
                            old_channel_offset_tp.topic)
                    else:
                        APP_LOGGER.info("Using new channel offset parser...")
                    steps = sys_listener_parser.get_topic_results(
                        dyn_align_steps_tp.topic)

                generate_dye_scatterplots(analysis_df,
                                          dyes,
                                          self.tmp_dyes_plot_path,
                                          self.job_name,
                                          self.pico1_dye,
                                          dyn_align_offsets=dyn_align_offsets,
                                          temps=temps,
                                          steps=steps)
                shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path)
                APP_LOGGER.info("Dyes scatter plot generated for %s." % \
                    self.job_name)
            except:
                APP_LOGGER.exception("Dyes scatter plot generation failed.")

        try:
            safe_make_dirs(self.tmp_path)

            exp_def_fetcher = ExpDefHandler()
            experiment = exp_def_fetcher.get_experiment_definition(
                self.exp_def_name)

            model_file_dict = available_models(self.ac_method)
            if self.ac_model is None:
                classifier_file = None
            else:
                if self.ac_model in model_file_dict:
                    classifier_file = model_file_dict[self.ac_model]
                else:
                    APP_LOGGER.exception(
                        "Assay caller model %s is unavailable for method %s." %
                        (self.ac_method, self.ac_model))
                    raise Exception(
                        "Assay caller model %s is unavailable for method %s." %
                        (self.ac_method, self.ac_model))

            AssayCallManager(self.num_probes,
                             in_file=self.analysis_file,
                             out_file=self.tmp_outfile_path,
                             scatter_plot_file=self.tmp_scatter_plot_path,
                             training_factor=self.training_factor,
                             assay=self.assay_dye,
                             fiducial=self.pico2_dye,
                             controls=experiment.negative_controls.barcodes,
                             ctrl_thresh=self.ctrl_thresh,
                             n_jobs=8,
                             controls_filtering=self.ctrl_filter,
                             assay_type=self.ac_method,
                             classifier_file=classifier_file)

            if not os.path.isfile(self.tmp_outfile_path):
                raise Exception('Secondary analysis assay caller job ' +
                                'failed: output file not generated.')

            shutil.copy(self.tmp_outfile_path, self.outfile_path)
            gen_dye_scatterplot(experiment.dyes, self.get_sys_listener_path())

            if os.path.isfile(self.tmp_scatter_plot_path):
                shutil.copy(self.tmp_scatter_plot_path, self.scatter_plot_path)
        finally:
            # Regardless of success or failure, remove the copied archive directory
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #8
0
    def __call__(self):
        # retrieve primary analysis data
        primary_analysis_doc = self.db_connector.find(
                                PA_PROCESS_COLLECTION,
                                criteria={UUID: self.primary_analysis_uuid},
                                projection={ID: 0, RESULT: 1, UUID: 1, DYES: 1})[0]

        # verify barcode dyes
        primary_analysis_dyes = set(primary_analysis_doc[DYES])
        identity_dyes = set([x[0] for x in self.dye_levels])
        if not identity_dyes.issubset(set(primary_analysis_dyes)):
            raise Exception("Dyes in levels: %s must be a subset of run dyes: %s" %
                            (identity_dyes, primary_analysis_dyes))

        # verify primary analysis file exists
        if not os.path.isfile(primary_analysis_doc[RESULT]):
            raise InvalidFileError(primary_analysis_doc[RESULT])

        # update database to indicate job is running
        update = {"$set": {STATUS: JOB_STATUS.running,
                           START_DATESTAMP: datetime.today()}}
        self.db_connector.update(SA_IDENTITY_COLLECTION, {UUID: self.uuid}, update)

        try:
            # for full analysis the user may want to turn off picoinjection filtering
            # even if there is a pico1 dye.  If use_pico1_filter is False, set pico1_dye to None
            if not self.use_pico1_filter:
                self.pico1_dye = None
            if not self.use_pico2_filter:
                self.pico2_dye = None
            safe_make_dirs(self.tmp_path)
            plate_base_path = os.path.join(self.tmp_path, 'tmp_plot')
            OfflineIdentity(in_path=primary_analysis_doc[RESULT],
                     num_probes=self.num_probes,
                     factory_type=FACTORY_ORGANIC,
                     plot_base_path=plate_base_path,
                     out_file=self.tmp_outfile_path,
                     report_path=self.tmp_report_path,
                     assay_dye=self.assay_dye,
                     pico1_dye=self.pico1_dye,
                     pico2_dye=self.pico2_dye,
                     dye_levels=self.dye_levels,
                     show_figure=False,
                     ignored_dyes=self.ignored_dyes,
                     filtered_dyes=self.filtered_dyes,
                     uninjected_threshold=self.ui_threshold,
                     dev_mode=self.dev_mode,
                     use_pico_thresh=self.use_pico_thresh,
                     max_uninj_ratio=self.max_uninj_ratio,
                     ignore_lowest_barcode=self.ignore_lowest_barcode,
                     drift_compensate=self.drift_compensate).execute()
            if not os.path.isfile(self.tmp_outfile_path):
                raise Exception("Secondary analysis identity job failed: identity output file not generated.")
            else:
                shutil.copy(self.tmp_outfile_path, self.outfile_path)
            tmp_plot_path = plate_base_path + ID_PLOT_SUFFIX
            tmp_plate_plot_path = plate_base_path + ID_PLATES_PLOT_SUFFIX
            tmp_temporal_plot_path = plate_base_path + ID_TEMPORAL_PLOT_SUFFIX
            tmp_drop_count_plot_path = plate_base_path + ID_DROP_COUNT_PLOT_SUFFIX
            if os.path.isfile(tmp_plot_path):
                shutil.copy(tmp_plot_path, self.plot_path)
            if os.path.isfile(tmp_plate_plot_path):
                shutil.copy(tmp_plate_plot_path, self.plate_plot_path)
            if os.path.isfile(tmp_temporal_plot_path):
                shutil.copy(tmp_temporal_plot_path, self.temporal_plot_path)
            if os.path.isfile(tmp_drop_count_plot_path):
                shutil.copy(tmp_drop_count_plot_path, self.drop_count_plot_path)
            if os.path.isfile(self.tmp_report_path):
                shutil.copy(self.tmp_report_path, self.report_path)
        finally:
            # Regardless of success or failure, remove the copied archive directory
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #9
0
        restart_server = args.restart
        start_server = args.start
        stop_server = args.stop
        show_status = args.status

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
    except Exception, e:
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2

    io_utilities.safe_make_dirs(HOME_DIR)
    io_utilities.safe_make_dirs(TARGETS_UPLOAD_PATH)
    io_utilities.safe_make_dirs(PROBES_UPLOAD_PATH)
    io_utilities.safe_make_dirs(PLATES_UPLOAD_PATH)
    io_utilities.safe_make_dirs(RESULTS_PATH)
    io_utilities.safe_make_dirs(REFS_PATH)

    # Clean up tmp dir.
    shutil.rmtree(TMP_PATH, ignore_errors=True)
    io_utilities.safe_make_dirs(TMP_PATH)

    # Update database with latest information
    update_archives()
    update_devices()
    update_dyes()