Пример #1
0
 def absorption_callback(future):
     try:
         _ = future.result()
         update = {
             "$set": {
                 STATUS: JOB_STATUS.succeeded,  # @UndefinedVariable
                 RESULT: outfile_path,
                 FINISH_DATESTAMP: datetime.today(),
                 URL: "http://%s/results/%s/%s" % (HOSTNAME, PORT, uuid)
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0:
             db_connector.update(ABSORPTION_COLLECTION, query, update)
         elif os.path.isfile(outfile_path):
             os.remove(outfile_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update = {
             "$set": {
                 STATUS: JOB_STATUS.failed,  # @UndefinedVariable
                 RESULT: None,
                 FINISH_DATESTAMP: datetime.today(),
                 ERROR: error_msg
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0:
             db_connector.update(ABSORPTION_COLLECTION, query, update)
         elif os.path.isfile(outfile_path):
             os.remove(outfile_path)
Пример #2
0
    def process_request(cls, params_dict):
        dataset = params_dict[cls.dataset_parameter][0]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, HDF5_DATASET: dataset}

        try:
            cls._DB_CONNECTOR.update(
                RUN_REPORT_COLLECTION, {UUID: report_uuid},
                {'$pull': {
                    IMAGE_STACKS: {
                        'name': dataset,
                        'upload': True
                    }
                }})
            cls._DB_CONNECTOR.remove(HDF5_COLLECTION, {HDF5_DATASET: dataset})
            json_response.update({"unassociate": True})
            APP_LOGGER.info("Removed dataset name=%s from run report uuid=%s" %
                            (dataset, report_uuid))
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            http_status_code = 500

        return json_response, http_status_code
Пример #3
0
    def process_request(cls, params_dict):
        tags = [t for t in params_dict[cls.tags_parameter] if t]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, TAGS: tags}

        try:
            cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION,
                                     {UUID: report_uuid},
                                     {'$addToSet': {
                                         TAGS: {
                                             '$each': tags
                                         }
                                     }})
            APP_LOGGER.info("Updated run report uuid=%s with tags %s." %
                            (report_uuid, tags))

            json_response[STATUS] = SUCCEEDED
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[STATUS] = FAILED
            json_response[ERROR] = str(sys.exc_info()[1])
            http_status_code = 500

        return make_clean_response(json_response, http_status_code)
Пример #4
0
 def process_callback(future):
     try:
         _ = future.result()
         update = { "$set": {
                              STATUS: JOB_STATUS.succeeded, # @UndefinedVariable
                              RESULT: outfile_path,
                              FINISH_DATESTAMP: datetime.today(),
                              URL: get_results_url(outfile_path),
                            }
                 }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0:
             db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                RESULT: None,
                                FINISH_DATESTAMP: datetime.today(),
                                ERROR: error_msg}}
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0:
             db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
    def process_request(cls, params_dict):
        metadata_file = params_dict[cls._file_param][0]
        application = params_dict[cls._app_param][0]
        json_response = {FILENAME: metadata_file.filename}
        http_status_code = 200
        file_uuid = str(uuid4())
        path = os.path.join(TMP_PATH, file_uuid)

        try:
            metadata_file.save(path)
            metadata_file.close()
            dialect = get_dialect(path)
            if dialect:
                probe_ids = cls._DB_CONNECTOR.distinct(
                    PROBE_METADATA_COLLECTION, PROBE_ID)
                ids_are_unique = cls.update_db(dialect, path, probe_ids,
                                               application)
                if not ids_are_unique:
                    http_status_code = 403
            else:
                http_status_code = 415
                json_response[ERROR] = "Invalid file format - file must " \
                    "be either tab or comma delimited."
        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            silently_remove_file(path)

        return make_clean_response(json_response, http_status_code)
Пример #6
0
    def set_defaults(self):
        """
        There are certain parameters that the user may not have sent
        but that can come from the experiment definition, set them here.

        Set workflow based on experiment type. The first 3 stages in each workflow are
        primary analysis, identity, and assay caller. The 4th stage depends on the
        type of experiment, i.e., genotyper API for hotspot experiment, exploratory API
        for exploratory experiment, and sequencing API for sequencing experiment.
        """
        try:
            exp_def_fetcher = ExpDefHandler()
            experiment = exp_def_fetcher.get_experiment_definition(
                self.parameters[EXP_DEF])

            self.exp_type = experiment.exp_type
            self.workflow = [PROCESS, IDENTITY, ASSAY_CALLER
                             ] + [WORKFLOW_LOOKUP[self.exp_type]]
            self.document_list = [PA_DOCUMENT, ID_DOCUMENT, AC_DOCUMENT] + \
                                 [DOCUMENT_LOOKUP[self.exp_type]]

            if DYES not in self.parameters or \
               DYE_LEVELS not in self.parameters or \
               NUM_PROBES not in self.parameters or \
               PICO1_DYE not in self.parameters:
                # get dyes and number of levels
                dye_levels = defaultdict(int)
                for barcode in experiment.barcodes:
                    for dye_name, lvl in barcode.dye_levels.items():
                        dye_levels[dye_name] = max(dye_levels[dye_name],
                                                   int(lvl + 1))
                if DYES not in self.parameters:
                    self.parameters[DYES] = dye_levels.keys()
                if DYE_LEVELS not in self.parameters:
                    self.parameters[DYE_LEVELS] = dye_levels.items()
                if NUM_PROBES not in self.parameters:
                    self.parameters[NUM_PROBES] = len(experiment.barcodes)
                if PICO1_DYE not in self.parameters:
                    self.parameters[PICO1_DYE] = None
        except:
            APP_LOGGER.exception(traceback.format_exc())

        # set parameters for anything user might not have set
        if FILTERED_DYES not in self.parameters:
            self.parameters[FILTERED_DYES] = list()

        if IGNORED_DYES not in self.parameters:
            self.parameters[IGNORED_DYES] = list()

        if CONTINUOUS_PHASE not in self.parameters:
            self.parameters[CONTINUOUS_PHASE] = False

        if DEV_MODE not in self.parameters:
            self.parameters[DEV_MODE] = DEFAULT_DEV_MODE

        if DRIFT_COMPENSATE not in self.parameters:
            self.parameters[DRIFT_COMPENSATE] = DEFAULT_DRIFT_COMPENSATE
Пример #7
0
    def process_request(cls, params_dict):
        job_uuids = params_dict[cls.job_uuid_param]
        job_name = params_dict[cls.job_name_param][0]
        exp_def_name = params_dict[cls.exp_defs_param][0]
        required_drops = params_dict[cls.req_drops_param][0]

        json_response = {GENOTYPER: []}
        status_codes = list()
        for i, assay_caller_uuid in enumerate(job_uuids):
            if len(job_uuids) == 1:
                cur_job_name = job_name
            else:
                cur_job_name = "%s-%d" % (job_name, i)
            status_code = 200

            if cur_job_name in cls._DB_CONNECTOR.distinct(
                    SA_GENOTYPER_COLLECTION, JOB_NAME):
                status_code = 403
                json_response[GENOTYPER].append({ERROR: 'Job exists.'})
            else:
                try:
                    # Create helper functions
                    genotyper_callable = SaGenotyperCallable(
                        assay_caller_uuid, exp_def_name, required_drops,
                        cls._DB_CONNECTOR, cur_job_name)
                    response = copy.deepcopy(genotyper_callable.document)
                    callback = make_process_callback(
                        genotyper_callable.uuid, exp_def_name,
                        genotyper_callable.ac_result_path,
                        genotyper_callable.ignored_dyes,
                        genotyper_callable.outfile_path, cls._DB_CONNECTOR,
                        cur_job_name)

                    # Add to queue
                    cls._EXECUTION_MANAGER.add_job(response[UUID],
                                                   genotyper_callable,
                                                   callback)

                except:
                    APP_LOGGER.exception(
                        "Error processing Genotyper post request.")
                    response = {
                        JOB_NAME: cur_job_name,
                        ERROR: str(sys.exc_info()[1])
                    }
                    status_code = 500
                finally:
                    if ID in response:
                        del response[ID]
                    json_response[GENOTYPER].append(response)

            status_codes.append(status_code)

        # If all jobs submitted successfully, then 200 should be returned.
        # Otherwise, the maximum status code seems good enough.
        return make_clean_response(json_response, max(status_codes))
Пример #8
0
 def process_request(cls, params_dict):
     try:
         valid_files = [
             fp for fp in os.listdir(MODIFIED_ARCHIVES_PATH)
             if allowed_file(os.path.join(MODIFIED_ARCHIVES_PATH, fp))
         ]
         return (valid_files, [], None)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
Пример #9
0
 def process_request(cls, params_dict):
     try:
         reports = cls._DB_CONNECTOR.find(RUN_REPORT_COLLECTION,
                                          {TAGS: {
                                              '$exists': True
                                          }})
         user_tags = set(t for r in reports for t in r[TAGS])
         return (list(user_tags), [], None)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
Пример #10
0
        def gen_dye_scatterplot(dyes, sys_listener_path):
            try:
                analysis_df = pandas.read_table(self.analysis_file,
                                                sep=sniff_delimiter(
                                                    self.analysis_file))
                ac_df = pandas.read_table(self.tmp_outfile_path,
                                          sep=sniff_delimiter(
                                              self.tmp_outfile_path))
                analysis_df['assay'] = False
                analysis_df.loc[analysis_df['identity'].notnull(),
                                'assay'] = ac_df['assay'].values

                # System listener inputs
                dyn_align_offsets = {}
                temps = {}
                steps = {}
                if sys_listener_path is not None:
                    sys_listener_dir = os.path.dirname(sys_listener_path)
                    clamp_temp_tp = ClampTempTopicParser()
                    old_channel_offset_tp = OldChannelOffsetTopicParser()
                    channel_offset_tp = ChannelOffsetTopicParser()
                    dyn_align_steps_tp = DynamicAlignStepsParser()
                    topic_parsers = [
                        clamp_temp_tp, old_channel_offset_tp,
                        channel_offset_tp, dyn_align_steps_tp
                    ]
                    sys_listener_parser = SystemListenerParser(
                        sys_listener_dir, topic_parsers=topic_parsers)
                    temps = sys_listener_parser.get_topic_results(
                        clamp_temp_tp.topic)
                    dyn_align_offsets = sys_listener_parser.get_topic_results(
                        channel_offset_tp.topic)
                    if len(dyn_align_offsets) < 1:
                        APP_LOGGER.info("Using old channel offset parser...")
                        dyn_align_offsets = sys_listener_parser.get_topic_results(
                            old_channel_offset_tp.topic)
                    else:
                        APP_LOGGER.info("Using new channel offset parser...")
                    steps = sys_listener_parser.get_topic_results(
                        dyn_align_steps_tp.topic)

                generate_dye_scatterplots(analysis_df,
                                          dyes,
                                          self.tmp_dyes_plot_path,
                                          self.job_name,
                                          self.pico1_dye,
                                          dyn_align_offsets=dyn_align_offsets,
                                          temps=temps,
                                          steps=steps)
                shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path)
                APP_LOGGER.info("Dyes scatter plot generated for %s." % \
                    self.job_name)
            except:
                APP_LOGGER.exception("Dyes scatter plot generation failed.")
    def _generate(self, ndyes, nchoose=5):
        """
        @param ndyes:   1nteger, number of dyes to use per solution
        #param nchoose: Integer, maximum number of combinations that will be further optimized
        """
        # check to see if the minimum maximum levels of dyes can make the requested number of dyes
        min_nbarcodes = numpy.product(self._barcode_min_nlvls[numpy.argsort(self._barcode_min_nlvls)[:ndyes]])
        max_nbarcodes = numpy.product(self._barcode_max_nlvls[numpy.argsort(self._barcode_max_nlvls)[-ndyes:]])

        # too many dyes were selected
        if min_nbarcodes > self._requested_nbarcodes:
            APP_LOGGER.info('Cannot generate requested number of barcodes (%d).  '
                            'Smallest library would have %d barcodes.' %
                            (self._requested_nbarcodes, min_nbarcodes))
            return

        # too few dyes were selected
        if max_nbarcodes < self._requested_nbarcodes:
            APP_LOGGER.info('Cannot generate requested number of barcodes (%d).  '
                            'Largest library would have %d barcodes.' %
                            (self._requested_nbarcodes, max_nbarcodes))
            return

        # find the optimal number of levels for each dye combination
        requested_dye_idxs = set(range(len(self._requested_dye_lots)))
        optimal_nlvls = list()
        for dye_idxs in itertools.combinations(xrange(len(self._barcode_profiles)), ndyes):
            dye_idxs = numpy.array(dye_idxs)

            # ignore combinations that do not include requested dyes
            if self.need_additional_db_dyes and \
                    self._requested_dye_lots and \
                    not requested_dye_idxs.issubset(dye_idxs):
                continue

            # ignore combinations in which the peaks are too close
            peaks = numpy.concatenate((self._barcode_peaks[dye_idxs], self._non_barcode_peaks))
            if numpy.any(numpy.diff(numpy.sort(peaks)) < self._min_peak_difference):
                continue

            try:
                candidate_nlvls, candidate_lowest_peak = self._calc_optimal_nlvls(dye_idxs)
                optimal_nlvls.append((candidate_lowest_peak, dye_idxs, candidate_nlvls))
            except Exception as e:
                APP_LOGGER.exception(e)

        optimal_nlvls.sort(key=lambda x: x[0])

        for _, dye_idxs, nlvls in optimal_nlvls[: nchoose]:
            try:
                self._make_design(nlvls, dye_idxs)
            except Exception as e:
                APP_LOGGER.exception(e)
Пример #12
0
    def _combine_sa(self, output_path, id_report_path, gt_png_path,
                    gt_png_sum_path, gt_kde_path, gt_kde_sum_path):
        """
        Combine Identity report, Assay Caller scatter plot, and Genotyper PNG

        @param id_report_path:          pathname of identity report
        @param gt_png_path:             pathname of genotyper scatter PNG
        @param gt_png_sum_path:         pathname of genotyper scatter sum PNG
        @param gt_kde_path:             pathname of genotyper KDE PNG
        @param gt_kde_sum_path:         pathname of genotyper KDE sum PNG
        """
        try:
            path = output_path + '_png_id'
            doc = SimpleDocTemplate(path, pagesize=landscape(letter))
            story = list()

            story.append(self.get_image(gt_png_sum_path))
            story.append(PageBreak())

            story.append(self.get_image(gt_kde_sum_path))
            story.append(PageBreak())

            styles = getSampleStyleSheet()
            id_title = Paragraph('Identity Report', styles['h2'])
            story.append(id_title)
            story.append(Spacer(1, 0.2 * inch))

            with open(id_report_path, 'r') as id_report:
                lines = id_report.readlines()
                for line in lines:
                    styles = getSampleStyleSheet()
                    left_indent = (len(line) - len(line.lstrip())) * 5
                    styles.add(
                        ParagraphStyle(name='custom_style',
                                       fontName=FONT_NAME_STD,
                                       fontSize=FONT_SIZE,
                                       leftIndent=left_indent))
                    p = Paragraph(line, styles['custom_style'])
                    story.append(p)
                story.append(PageBreak())

            doc.build(story,
                      onFirstPage=self.standard_page,
                      onLaterPages=self.standard_page)

            self._merge_pdfs(output_path, gt_png_path, gt_kde_path, path)

            os.unlink(path)
            return True
        except:
            APP_LOGGER.exception(traceback.format_exc())
            return False
Пример #13
0
    def process_request(cls, params_dict):
        probes_file_uuid = params_dict[cls._probes_param][0]
        targets_file_uuid = params_dict[cls._targets_param][0]
        strict = params_dict[cls._strict_param][0]
        job_name = params_dict[cls._job_name_param][0]

        json_response = {
            PROBES: probes_file_uuid,
            TARGETS: targets_file_uuid,
            STRICT: strict,
            UUID: str(uuid4()),
            STATUS: JOB_STATUS.submitted,  # @UndefinedVariable
            JOB_NAME: job_name,
            JOB_TYPE_NAME: JOB_TYPE.absorption,  # @UndefinedVariable
            SUBMIT_DATESTAMP: datetime.today(),
        }
        http_status_code = 200

        if job_name in cls._DB_CONNECTOR.distinct(ABSORPTION_COLLECTION,
                                                  JOB_NAME):
            http_status_code = 403
        else:
            try:
                probes_path = cls._DB_CONNECTOR.find_one(
                    PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH]
                targets_path = cls._DB_CONNECTOR.find_one(
                    TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH]
                outfile_path = os.path.join(RESULTS_PATH, json_response[UUID])

                # Create helper functions
                abs_callable = AbsorbtionCallable(targets_path, probes_path,
                                                  strict, outfile_path,
                                                  json_response[UUID],
                                                  cls._DB_CONNECTOR)
                callback = make_absorption_callback(json_response[UUID],
                                                    outfile_path,
                                                    cls._DB_CONNECTOR)
                # Add to queue and update DB
                cls._DB_CONNECTOR.insert(ABSORPTION_COLLECTION,
                                         [json_response])
                cls._EXECUTION_MANAGER.add_job(json_response[UUID],
                                               abs_callable, callback)
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500
            finally:
                if ID in json_response:
                    del json_response[ID]

        return make_clean_response(json_response, http_status_code)
Пример #14
0
    def process_request(cls, params_dict):
        probes_file_uuid = params_dict[ParameterFactory.file_uuid(
            "probes", PROBES_COLLECTION)][0]
        targets_file_uuid = params_dict[ParameterFactory.file_uuid(
            "targets", TARGETS_COLLECTION)][0]
        absorb = params_dict[ParameterFactory.boolean(
            "absorb", "Check for absorbed probes.")][0]
        num = params_dict[ParameterFactory.integer(
            "num",
            "Minimum number of probes for a target.",
            default=3,
            minimum=1)][0]
        job_name = params_dict[ParameterFactory.lc_string(
            JOB_NAME, "Unique name to give this job.")][0]

        json_response = {
            PROBES: probes_file_uuid,
            TARGETS: targets_file_uuid,
            ABSORB: absorb,
            NUM: num,
            UUID: str(uuid4()),
            STATUS: JOB_STATUS.submitted,  # @UndefinedVariable
            JOB_NAME: job_name,
            DATESTAMP: datetime.today(),
        }
        http_status_code = 200

        if job_name in cls._DB_CONNECTOR.distinct(VALIDATION_COLLECTION,
                                                  JOB_NAME):
            http_status_code = 403
        else:
            try:
                probes_path = cls._DB_CONNECTOR.find_one(
                    PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH]
                targets_path = cls._DB_CONNECTOR.find_one(
                    TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH]

                #ADD VALIDATOR JOB TO QUEUE

                cls._DB_CONNECTOR.insert(VALIDATION_COLLECTION,
                                         [json_response])
                del json_response[ID]
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500

        return make_clean_response(json_response, http_status_code)
Пример #15
0
def get_hdf5_datasets(log_data, data_folder):
    """
    Fetch the HDF5 archives associated with a run report.

    @param log_data:            the document of run report yaml
    @param date_folder:         folder where data is located
    """
    if log_data is None or RUN_ID not in log_data: return set()

    run_id = log_data[RUN_ID]
    hdf5_paths = [
        os.path.join(data_folder, f + '.h5')
        for f in [run_id, run_id + '-baseline']
        if os.path.isfile(os.path.join(data_folder, f + '.h5'))
    ]
    all_datasets = set()

    for path in hdf5_paths:
        exist_records = _DB_CONNECTOR.find(
            HDF5_COLLECTION, {HDF5_PATH: remove_disk_directory(path)})
        if exist_records:
            all_datasets.update(set(r[HDF5_DATASET] for r in exist_records))
            continue

        new_records = list()
        try:
            with h5py.File(path) as h5_file:
                dataset_names = h5_file.keys()
            for dsname in dataset_names:
                if re.match(r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}', dsname):
                    new_records.append({
                        HDF5_PATH: remove_disk_directory(path),
                        HDF5_DATASET: dsname,
                    })
        except:
            APP_LOGGER.exception(
                'Unable to get dataset information from HDF5 file: %s' % path)

        if new_records:
            APP_LOGGER.info('Found %d datasets from HDF5 file: %s' %
                            (len(new_records), path))
            _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records)
            all_datasets.update(set(r[HDF5_DATASET] for r in new_records))

    return all_datasets
Пример #16
0
 def process_callback(future):
     try:
         _ = future.result()
         update = {
             '$set': {
                 STATUS: JOB_STATUS.succeeded,  # @UndefinedVariable
                 RESULT: outfile_path,
                 URL: get_results_url(outfile_path),
                 SCATTER_PLOT: scatter_plot_path,
                 SCATTER_PLOT_URL: get_results_url(scatter_plot_path),
                 DYES_SCATTER_PLOT: dyes_scatter_plot_path,
                 DYES_SCATTER_PLOT_URL:
                 get_results_url(dyes_scatter_plot_path),
                 FINISH_DATESTAMP: datetime.today(),
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query,
                                  {})) > 0:
             db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
             silently_remove_file(scatter_plot_path)
             silently_remove_file(dyes_scatter_plot_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update = {
             '$set': {
                 STATUS: JOB_STATUS.failed,  # @UndefinedVariable
                 RESULT: None,
                 FINISH_DATESTAMP: datetime.today(),
                 ERROR: error_msg
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query,
                                  {})) > 0:
             db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
             silently_remove_file(scatter_plot_path)
             silently_remove_file(dyes_scatter_plot_path)
Пример #17
0
    def process_callback(future):
        try:
            _ = future.result()
            report_errors = check_report_for_errors(report_path)
            update_data = { STATUS: JOB_STATUS.succeeded,
                            RESULT: outfile_path,
                            URL: get_results_url(outfile_path),
                            PLOT: plot_path,
                            REPORT: report_path,
                            PLOT_URL: get_results_url(plot_path),
                            REPORT_URL: get_results_url(report_path),
                            PLATE_PLOT_URL: get_results_url(plate_plot_path),
                            TEMPORAL_PLOT_URL: get_results_url(temporal_plot_path),
                            DROP_COUNT_PLOT_URL: get_results_url(drop_count_plot_path),
                            FINISH_DATESTAMP: datetime.today()}
            if report_errors:
                update_data[ERROR] = ' '.join(report_errors)

            update = {"$set": update_data}
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_IDENTITY_COLLECTION, query, update)
            else:
                silently_remove_file(report_path)
                silently_remove_file(outfile_path)
                silently_remove_file(plot_path)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            error_msg = str(sys.exc_info()[1])

            update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                   RESULT: None,
                                   FINISH_DATESTAMP: datetime.today(),
                                   ERROR: error_msg}}
            if os.path.isfile(report_path):
                update['$set'][REPORT_URL]
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_IDENTITY_COLLECTION, query, update)
            else:
                silently_remove_file(report_path)
                silently_remove_file(outfile_path)
                silently_remove_file(plot_path)
Пример #18
0
    def process_request(cls, params_dict):

        dye_metrics = params_dict[cls._dyes_metrics]
        drop_ave = params_dict[cls._drop_ave_diameter][0]
        drop_std = params_dict[cls._drop_std_diameter][0]
        http_status_code = 200
        uuid = str(uuid4())
        json_response = {
            UUID: uuid,
            DATESTAMP: datetime.today(),
        }
        try:
            dye_names = list()
            nlvls = list()
            intensities = list()
            for dye_name, nlvl, low, high in dye_metrics:
                dye_names.append(dye_name)
                nlvls.append(nlvl)
                intensities.append((low, high))

            centroids = make_centroids(nlvls, intensities)
            clusters = make_clusters(centroids,
                                     drop_ave=drop_ave,
                                     drop_std=drop_std)
            collisions = check_collision(clusters)

            json_response[DROP_AVE_DIAMETER] = drop_ave
            json_response[DROP_STD_DIAMETER] = drop_std
            json_response[DYE_METRICS] = map(list, dye_metrics)
            json_response['collisions'] = collisions
            json_response['nclusters'] = numpy.product(nlvls)

        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])

        return make_clean_response(json_response, http_status_code)
    def _calc_dye_max_intensities(self, dye_idxs, nlvls, resolution=100.0):
        """
        The ideal library will take full advantage of our intensity space, which
        peaks at 65535 intensity units.  This function attempt to optimize the
        maximum level of each dye by recomposing the profiles and testing that
        they do not saturate.

        @param dye_idxs:    1D numpy array of the indexes of the barcode dyes
        @param nlvls:       1D numpy array of the number of levels for each dye
        @param resolution:  Float, intensity unit spacing, i.e. resolution of 100.0
                            would result in intensities of: 1000.0, 1100.00, 1200.0...
        @return:            1D numpy of maximum intensities for each dye.
        """
        dye_max_intensities = None
        # test various percent cutoffs
        for percent_best in numpy.arange(2.5, 25, 2.5):
            try:
                # make a group of scalars for each dye (dimension)
                scalars = [numpy.linspace(10000.0, MAX_INTEN, resolution).reshape(-1, 1) for _ in dye_idxs]
                # create barcode profiles by summing each combination of dyes profiles
                # to find an optimal max barcode profile
                scalar_combos = scalars.pop(0)
                while scalars:
                    scalar_combos = numpy.hstack((
                        numpy.repeat(scalar_combos, resolution, axis=0),
                        numpy.tile(scalars.pop(0), (len(scalar_combos), 1))
                    ))
                    scalar_combos = self._rm_saturated(scalar_combos, dye_idxs)
                    scalar_combos = self._rm_most_variable(scalar_combos, percent_best, nlvls)

                midx = numpy.argmax(numpy.sum(scalar_combos, axis=1))

                dye_max_intensities = scalar_combos[midx]
                break
            except Exception as e:
                APP_LOGGER.exception(e)

        if dye_max_intensities is None or len(dye_max_intensities) != len(dye_idxs):
            raise Exception('A library cannot be generated from this combination of dyes.')

        return dye_max_intensities
Пример #20
0
    def process_request(cls, params_dict):
        targets_file = params_dict[ParameterFactory.file(
            "Targets FASTA file.")][0]
        json_response = {FILENAME: targets_file.filename}
        http_status_code = 200
        file_uuid = str(uuid4())

        path = os.path.join(TARGETS_UPLOAD_PATH, file_uuid)
        existing_filenames = cls._DB_CONNECTOR.distinct(
            TARGETS_COLLECTION, FILENAME)
        if os.path.exists(path) or targets_file.filename in existing_filenames:
            http_status_code = 403
        elif validate_fasta(targets_file) == False:
            http_status_code = 415
        else:
            try:
                targets_file.save(path)
                targets_file.close()
                json_response[URL] = "http://%s/uploads/%s/targets/%s" % (
                    HOSTNAME, PORT, file_uuid)
                json_response[FILEPATH] = path
                json_response[UUID] = file_uuid
                json_response[DATESTAMP] = datetime.today()
                json_response[TYPE] = "targets"
                if "." in targets_file.filename:
                    json_response[FORMAT] = targets_file.filename.split(
                        ".")[-1]
                else:
                    json_response[FORMAT] = "Unknown"

                cls._DB_CONNECTOR.insert(TARGETS_COLLECTION, [json_response])
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500
            finally:
                if ID in json_response:
                    del json_response[ID]

        return make_clean_response(json_response, http_status_code)
Пример #21
0
    def process_callback(future):
        try:
            _ = future.result()

            update = { "$set": {
                                 STATUS: JOB_STATUS.succeeded, # @UndefinedVariable
                                 RESULT: outfile_path,
                                 URL: get_results_url(os.path.join(dirname, uuid)),
                                 PNG: os.path.join(dirname, scatter_ind_pdf_fn),
                                 PNG_URL: get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)),
                                 PNG_SUM: os.path.join(dirname, scatter_png_fn),
                                 PNG_SUM_URL: get_results_url(os.path.join(dirname, scatter_png_fn)),
                                 KDE_PNG: os.path.join(dirname, kde_ind_pdf_fn),
                                 KDE_PNG_URL: get_results_url(os.path.join(dirname, kde_ind_pdf_fn)),
                                 KDE_PNG_SUM: os.path.join(dirname, kde_png_fn),
                                 KDE_PNG_SUM_URL: get_results_url(os.path.join(dirname, kde_png_fn)),
                                 FINISH_DATESTAMP: datetime.today(),
                               }
                    }
        except:
            APP_LOGGER.exception("Error in Exploratory post request process callback.")
            error_msg = str(sys.exc_info()[1])
            update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                   RESULT: None,
                                   PDF: None,
                                   PNG: None,
                                   PNG_SUM: None,
                                   FINISH_DATESTAMP: datetime.today(),
                                   ERROR: error_msg}}
        finally:
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_EXPLORATORY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_EXPLORATORY_COLLECTION, query, update)
            else:
                silently_remove_file(outfile_path)
                silently_remove_file(os.path.join(dirname, scatter_png_fn))
                silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn))
                silently_remove_file(os.path.join(dirname, kde_png_fn))
                silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))
Пример #22
0
    def process_request(cls, params_dict):
        tag = params_dict[cls.tag_parameter][0]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, TAGS: [tag]}

        try:
            cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION,
                                     {UUID: report_uuid},
                                     {'$pull': {
                                         TAGS: tag
                                     }})
            json_response[STATUS] = SUCCEEDED
            APP_LOGGER.info("Removed tag name=%s from run report uuid=%s" %
                            (tag, report_uuid))
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            json_response[STATUS] = FAILED
            http_status_code = 500

        return json_response, http_status_code
Пример #23
0
def get_datasets_from_files(filepaths):
    """
    Given the paths of HDF5/image stack files, return a tuple of a dictionary and a boolean.
    The dictionary has (filepath, set of datasets) as key, value. The boolean indicates
    whether any file contains dataset(s) with duplicate name(s).

    @param filepaths:           filepaths
    """
    if not filepaths: return dict(), False

    all_exist_datasets = _DB_CONNECTOR.distinct(HDF5_COLLECTION, HDF5_DATASET)
    fp_to_datasets = defaultdict(set)
    duplicate = False
    for fp in filepaths:
        if fp.lower().endswith('.h5'):
            try:
                with h5py.File(fp, 'r') as h5_file:
                    dataset_names = h5_file.keys()
                for dsname in dataset_names:
                    if not dsname.lower().startswith("laser_power"):
                        if dsname not in all_exist_datasets:
                            fp_to_datasets[fp].add(dsname)
                        else:
                            duplicate = True
            except:
                APP_LOGGER.exception(
                    'Unable to get dataset information from HDF5 file: %s' %
                    fp)

    # check if there are duplicate datasets in fp_to_datasets
    unique_datasets = set()
    for datasets in fp_to_datasets.values():
        unique_datasets = unique_datasets | datasets
    if len(unique_datasets) < sum(len(d) for d in fp_to_datasets.values()):
        duplicate = True
    return fp_to_datasets, duplicate
Пример #24
0
    def process_request(cls, params_dict):
        replay_stack_name = params_dict[cls._name_param][0]
        ham_stack_name = params_dict[cls._ham_imgs_param][0]
        mon1_stack_name = params_dict[cls._mon1_imgs_param][0]
        mon2_stack_name = params_dict[cls._mon2_imgs_param][0]
        short_desc = params_dict[cls._short_desc_param][0]
        http_status_code = 200
        uuid = str(uuid4())
        json_response = {DATESTAMP: datetime.today()}

        try:
            # check for existing exists
            existing_replay_stacks = cls._DB_CONNECTOR.find(
                IMAGES_COLLECTION, {
                    NAME: replay_stack_name,
                    STACK_TYPE: REPLAY
                }, [NAME, RESULT])

            existing_ham_stacks = cls._DB_CONNECTOR.find(
                IMAGES_COLLECTION, {
                    NAME: ham_stack_name,
                    STACK_TYPE: HAM
                }, [RESULT])

            existing_mon1_stacks = cls._DB_CONNECTOR.find(
                IMAGES_COLLECTION, {
                    NAME: mon1_stack_name,
                    STACK_TYPE: MONITOR1
                }, [RESULT])

            existing_mon2_stacks = cls._DB_CONNECTOR.find(
                IMAGES_COLLECTION, {
                    NAME: mon2_stack_name,
                    STACK_TYPE: MONITOR2
                }, [RESULT])

            similar_replay_stacks = cls._DB_CONNECTOR.find(
                IMAGES_COLLECTION, {
                    HAM_NAME: ham_stack_name,
                    MON1_NAME: mon1_stack_name,
                    MON2_NAME: mon2_stack_name,
                    STACK_TYPE: REPLAY
                }, [NAME, RESULT])
            # verify replay stack name is unique
            if existing_replay_stacks:
                http_status_code = 403
                json_response[ERROR] = 'Replay image stack with given name already ' \
                            'exists.'
            # check if similar replay stack already exists
            elif similar_replay_stacks:
                similar_name = similar_replay_stacks[0][NAME]
                http_status_code = 403
                json_response[
                    ERROR] = 'Similar replay stack named "%s" already exists.' % similar_name
            # if no similar stack exists enter it into the database
            else:
                tmp_path = ''
                try:
                    # temporary path for taring, untaring, etc...
                    tmp_path = tempfile.mkdtemp()

                    # create a replay directory
                    replay_dir_path = os.path.join(tmp_path, 'replay')
                    os.mkdir(replay_dir_path)

                    # make readme
                    readme_file_name = 'README'
                    readme_path = os.path.join(replay_dir_path,
                                               readme_file_name)
                    readme_str = '\n'.join([
                        replay_stack_name, ham_stack_name, mon1_stack_name,
                        mon2_stack_name, short_desc
                    ])
                    with open(readme_path, 'w') as fh:
                        fh.write(readme_str)

                    # create new tar file
                    new_tf_name = uuid + '.tar.gz'
                    new_tf_path = os.path.join(tmp_path, new_tf_name)
                    new_tf = tarfile.open(new_tf_path, 'w:gz')

                    # add readme and images
                    extract_imgs(existing_ham_stacks[0][RESULT],
                                 replay_dir_path)
                    extract_imgs(existing_mon1_stacks[0][RESULT],
                                 replay_dir_path)
                    extract_imgs(existing_mon2_stacks[0][RESULT],
                                 replay_dir_path)
                    new_tf.add(replay_dir_path,
                               'replay',
                               filter=set_tar_permissions)
                    new_tf.add(readme_path,
                               readme_file_name,
                               filter=set_tar_permissions)
                    new_tf.close()

                    # move new tar file to results directory
                    archive_path = os.path.join(RESULTS_PATH, new_tf_name)
                    shutil.move(new_tf_path, archive_path)

                    # insert into database
                    url = 'http://%s/results/%s/%s' % (
                        HOSTNAME, PORT, os.path.basename(archive_path))
                    json_response[FILENAME] = new_tf_name
                    json_response[RESULT] = archive_path
                    json_response[URL] = url
                    json_response[UUID] = uuid
                    json_response[HAM_NAME] = ham_stack_name
                    json_response[MON1_NAME] = mon1_stack_name
                    json_response[MON2_NAME] = mon2_stack_name
                    json_response[NAME] = replay_stack_name
                    json_response[STACK_TYPE] = REPLAY
                    json_response[DESCRIPTION] = short_desc
                    cls._DB_CONNECTOR.insert(IMAGES_COLLECTION,
                                             [json_response])
                except:
                    APP_LOGGER.exception(traceback.format_exc())
                    http_status_code = 500
                    json_response[ERROR] = str(sys.exc_info()[1])
                finally:
                    silently_remove_tree(tmp_path)

        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            if ID in json_response:
                del json_response[ID]

        return make_clean_response(json_response, http_status_code)
Пример #25
0
    def process_callback(future):
        try:
            _ = future.result()

            dirname = os.path.dirname(outfile_path)
            vcf_fn = os.path.basename(outfile_path)
            basename = os.path.splitext(vcf_fn)[0]
            pdf_fn = '%s.%s' % (basename, PDF)
            scatter_png_fn = '%s_scatter.%s' % (basename, PNG)
            scatter_ind_pdf_fn = '%s_scatter_ind.%s' % (basename, PDF)
            kde_png_fn = '%s_kde.%s' % (basename, PNG)
            kde_ind_pdf_fn = '%s_kde_ind.%s' % (basename, PDF)

            generate_plots(exp_def_name,
                           ac_result_path,
                           os.path.splitext(outfile_path)[0],
                           ignored_dyes=ignored_dyes,
                           data_set_name=cur_job_name)

            update = {
                "$set": {
                    STATUS:
                    JOB_STATUS.succeeded,  # @UndefinedVariable
                    RESULT:
                    outfile_path,
                    URL:
                    get_results_url(os.path.join(dirname, vcf_fn)),
                    PDF:
                    os.path.join(dirname, pdf_fn),
                    PDF_URL:
                    get_results_url(os.path.join(dirname, pdf_fn)),
                    PNG:
                    os.path.join(dirname, scatter_ind_pdf_fn),
                    PNG_URL:
                    get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)),
                    PNG_SUM:
                    os.path.join(dirname, scatter_png_fn),
                    PNG_SUM_URL:
                    get_results_url(os.path.join(dirname, scatter_png_fn)),
                    KDE_PNG:
                    os.path.join(dirname, kde_ind_pdf_fn),
                    KDE_PNG_URL:
                    get_results_url(os.path.join(dirname, kde_ind_pdf_fn)),
                    KDE_PNG_SUM:
                    os.path.join(dirname, kde_png_fn),
                    KDE_PNG_SUM_URL:
                    get_results_url(os.path.join(dirname, kde_png_fn)),
                    FINISH_DATESTAMP:
                    datetime.today(),
                }
            }
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_GENOTYPER_COLLECTION, query, {})) > 0:
                db_connector.update(SA_GENOTYPER_COLLECTION, query, update)
            else:
                silently_remove_file(outfile_path)
                silently_remove_file(os.path.join(dirname, pdf_fn))
                silently_remove_file(os.path.join(dirname, scatter_png_fn))
                silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn))
                silently_remove_file(os.path.join(dirname, kde_png_fn))
                silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))
        except:
            APP_LOGGER.exception(
                "Error in Genotyper post request process callback.")
            error_msg = str(sys.exc_info()[1])
            update = {
                "$set": {
                    STATUS: JOB_STATUS.failed,  # @UndefinedVariable
                    RESULT: None,
                    PDF: None,
                    PNG: None,
                    PNG_SUM: None,
                    FINISH_DATESTAMP: datetime.today(),
                    ERROR: error_msg
                }
            }
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_GENOTYPER_COLLECTION, query, {})) > 0:
                db_connector.update(SA_GENOTYPER_COLLECTION, query, update)
            else:
                silently_remove_file(outfile_path)
                silently_remove_file(os.path.join(dirname, pdf_fn))
                silently_remove_file(os.path.join(dirname, scatter_png_fn))
                silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn))
                silently_remove_file(os.path.join(dirname, kde_png_fn))
                silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))
Пример #26
0
    def process_request(cls, params_dict):
        archive_names  = params_dict[cls.archives_param]
        job_name       = params_dict[cls.job_name_param][0]

        json_response = {CONVERT_IMAGES: []}

        # Ensure archive directory is valid
        try:
            archives = list()
            for archive_name in archive_names:
                archives.extend(get_archive_dirs(archive_name,
                                                 extensions=["bin"]))
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            return make_clean_response(json_response, 500)

        # Ensure at least one valid archive is found
        if len(archives) < 1:
            return make_clean_response(json_response, 404)

        # Process each archive
        status_codes  = []
        for i, archive in enumerate(archives):
            if len(archives) == 1:
                cur_job_name = job_name
            else:
                cur_job_name = "%s-%d" % (job_name, i)

            response = {
                        ARCHIVE: archive,
                        UUID: str(uuid4()),
                        STATUS: JOB_STATUS.submitted,       # @UndefinedVariable
                        JOB_NAME: cur_job_name,
                        JOB_TYPE_NAME: JOB_TYPE.pa_convert_images, # @UndefinedVariable
                        SUBMIT_DATESTAMP: datetime.today(),
                       }
            status_code = 200

            if cur_job_name in cls._DB_CONNECTOR.distinct(PA_CONVERT_IMAGES_COLLECTION,
                                                          JOB_NAME):
                status_code = 403
            else:
                try:
                    results_folder = get_results_folder()
                    outfile_path = os.path.join(results_folder,
                                                response[UUID] + ".tar.gz")

                    # Create helper functions
                    abs_callable = PaConvertImagesCallable(archive,
                                                           outfile_path,
                                                           response[UUID],
                                                           cls._DB_CONNECTOR)
                    callback = make_process_callback(response[UUID],
                                                     outfile_path,
                                                     cls._DB_CONNECTOR)

                    # Add to queue and update DB
                    cls._DB_CONNECTOR.insert(PA_CONVERT_IMAGES_COLLECTION,
                                             [response])
                    cls._EXECUTION_MANAGER.add_job(response[UUID],
                                                   abs_callable, callback)
                    del response[ID]
                except:
                    APP_LOGGER.exception(traceback.format_exc())
                    response[ERROR]  = str(sys.exc_info()[1])
                    status_code = 500

            json_response[CONVERT_IMAGES].append(response)
            status_codes.append(status_code)

        # If all jobs submitted successfully, then 200 should be returned.
        # Otherwise, the maximum status code seems good enough.
        return make_clean_response(json_response, max(status_codes))
Пример #27
0
    def process_request(cls, params_dict):
        filenames = params_dict[cls.filenames_parameter]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, FILENAMES: filenames}

        filepaths = [
            os.path.join(MODIFIED_ARCHIVES_PATH, secure_filename(fn))
            for fn in filenames
        ]
        if not filenames or not report_uuid or not all(
                allowed_file(fp) for fp in filepaths):
            http_status_code = 400
        elif any(
                cls._DB_CONNECTOR.find_one(HDF5_COLLECTION, HDF5_PATH,
                                           {'$regex': fn + '$'}) is not None
                for fn in filenames):
            http_status_code = 403
        else:
            try:
                fp_to_datasets, duplicate = get_datasets_from_files(filepaths)
                if not fp_to_datasets or duplicate:
                    http_status_code = 403
                else:
                    new_hdf5_records = [{
                        HDF5_PATH: fp,
                        HDF5_DATASET: dsname,
                        "upload": True
                    } for fp in fp_to_datasets
                                        for dsname in fp_to_datasets[fp]]
                    cls._DB_CONNECTOR.insert(HDF5_COLLECTION, new_hdf5_records)
                    APP_LOGGER.info('Updated database with %d new HDF5 files' %
                                    len(new_hdf5_records))

                    run_report = cls._DB_CONNECTOR.find_one(
                        RUN_REPORT_COLLECTION, UUID, report_uuid)
                    if run_report:
                        exist_datasets = set([
                            d for d in run_report[IMAGE_STACKS]
                            if isinstance(d, str) or isinstance(d, unicode)
                        ])
                        new_datasets = set()
                        for datasets in fp_to_datasets.values():
                            new_datasets = new_datasets | datasets
                        new_datasets = list(new_datasets - exist_datasets)
                        if new_datasets:
                            cls._DB_CONNECTOR.update(
                                RUN_REPORT_COLLECTION, {UUID: report_uuid}, {
                                    '$addToSet': {
                                        IMAGE_STACKS: {
                                            '$each': [{
                                                'name': d,
                                                'upload': True
                                            } for d in new_datasets]
                                        }
                                    }
                                })
                            APP_LOGGER.info(
                                "Updated run report uuid=%s with %d HDF5 datasets."
                                % (report_uuid, len(new_datasets)))

                        del run_report[ID]
                        json_response.update({
                            "run_report": run_report,
                            "uploaded": new_datasets
                        })
                    else:
                        json_response.update({
                            "error":
                            "Run report uuid=%s does not exist." % report_uuid
                        })
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500

        return make_clean_response(json_response, http_status_code)
Пример #28
0
    def process_request(cls, params_dict):
        job_uuids       = params_dict[cls.job_uuid_param]
        job_name        = params_dict[cls.job_name_param][0]

        pico1_dye=None
        if cls.pico1_dye_param in params_dict:
            pico1_dye    = params_dict[cls.pico1_dye_param][0]

        use_pico1_filter = True
        if pico1_dye is None:
            use_pico1_filter = False

        pico2_dye=None
        if cls.pico2_dye_param in params_dict:
            pico2_dye    = params_dict[cls.pico2_dye_param][0]

        use_pico2_filter = True
        if pico2_dye is None:
            use_pico2_filter = False

        assay_dye = None
        if cls.assay_dye_param in params_dict:
            assay_dye       = params_dict[cls.assay_dye_param][0]
        num_probes      = params_dict[cls.n_probes_param][0]
        training_factor = params_dict[cls.training_param][0]
        dye_levels      = params_dict[cls.dye_levels_param]

        filtered_dyes = list()
        if cls.filtered_dyes_param in params_dict:
            filtered_dyes = params_dict[cls.filtered_dyes_param]

        ignored_dyes = list()
        if cls.ignored_dyes_param in params_dict:
            ignored_dyes = params_dict[cls.ignored_dyes_param]

        ui_threshold    = params_dict[cls.ui_threshold_param][0]

        if cls.dev_mode_param in params_dict and \
           params_dict[cls.dev_mode_param][0]:
            dev_mode = params_dict[cls.dev_mode_param][0]
        else:
            dev_mode = DEFAULT_DEV_MODE

        if cls.drift_compensate_param in params_dict and \
           params_dict[cls.drift_compensate_param][0]:
            drift_compensate = params_dict[cls.drift_compensate_param][0]
        else:
            drift_compensate = DEFAULT_DRIFT_COMPENSATE

        if cls.continuous_phase_param in params_dict and \
           params_dict[cls.continuous_phase_param][0]:
            use_pico_thresh = True
        else:
            use_pico_thresh = False

        if cls.ignore_lowest_barcode in params_dict and \
           params_dict[cls.ignore_lowest_barcode][0]:
            ignore_lowest_barcode = params_dict[cls.ignore_lowest_barcode][0]
        else:
            ignore_lowest_barcode = DEFAULT_IGNORE_LOWEST_BARCODE

        max_uninj_ratio = params_dict[cls.max_ui_ratio_param][0]

        json_response = {IDENTITY: []}

        # Ensure analysis job exists
        try:
            criteria        = {UUID: {"$in": job_uuids}}
            projection      = {ID: 0, RESULT: 1, UUID: 1, CONFIG: 1}
            pa_process_jobs = cls._DB_CONNECTOR.find(PA_PROCESS_COLLECTION,
                                                     criteria, projection)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            return make_clean_response(json_response, 500)

        # Ensure at least one valid analysis job exists
        if len(pa_process_jobs) < 1:
            return make_clean_response(json_response, 404)

        # Process each archive
        status_codes  = []
        for i, pa_uuid in enumerate(job_uuids):
            if len(pa_process_jobs) == 1:
                cur_job_name = job_name
            else:
                cur_job_name = "%s-%d" % (job_name, i)


            status_code = 200

            if cur_job_name in cls._DB_CONNECTOR.distinct(SA_IDENTITY_COLLECTION,
                                                          JOB_NAME):
                status_code = 403
                json_response[IDENTITY].append({ERROR: 'Job exists.'})
            else:
                try:
                    # Create helper functions
                    sai_callable = SaIdentityCallable(pa_uuid,
                                                      num_probes,
                                                      training_factor,
                                                      assay_dye,
                                                      use_pico1_filter,
                                                      use_pico2_filter,
                                                      pico1_dye,
                                                      pico2_dye,
                                                      dye_levels,
                                                      ignored_dyes,
                                                      filtered_dyes,
                                                      ui_threshold,
                                                      max_uninj_ratio,
                                                      cls._DB_CONNECTOR,
                                                      job_name,
                                                      use_pico_thresh,
                                                      ignore_lowest_barcode,
                                                      dev_mode,
                                                      drift_compensate)
                    response = copy.deepcopy(sai_callable.document)
                    callback = make_process_callback(sai_callable.uuid,
                                                     sai_callable.outfile_path,
                                                     sai_callable.plot_path,
                                                     sai_callable.report_path,
                                                     sai_callable.plate_plot_path,
                                                     sai_callable.temporal_plot_path,
                                                     sai_callable.drop_count_plot_path,
                                                     cls._DB_CONNECTOR)

                    # Add to queue
                    cls._EXECUTION_MANAGER.add_job(sai_callable.uuid,
                                                   sai_callable,
                                                   callback)

                except:
                    APP_LOGGER.exception(traceback.format_exc())
                    response = {JOB_NAME: cur_job_name, ERROR: str(sys.exc_info()[1])}
                    status_code = 500
                finally:
                    if ID in response:
                        del response[ID]
                    json_response[IDENTITY].append(response)

            status_codes.append(status_code)

        # If all jobs submitted successfully, then 200 should be returned.
        # Otherwise, the maximum status code seems good enough.
        return make_clean_response(json_response, max(status_codes))
Пример #29
0
    def __call__(self):
        update = {
            '$set': {
                STATUS: JOB_STATUS.running,  # @UndefinedVariable
                START_DATESTAMP: datetime.today()
            }
        }
        query = {UUID: self.uuid}
        self.db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)

        def gen_dye_scatterplot(dyes, sys_listener_path):
            try:
                analysis_df = pandas.read_table(self.analysis_file,
                                                sep=sniff_delimiter(
                                                    self.analysis_file))
                ac_df = pandas.read_table(self.tmp_outfile_path,
                                          sep=sniff_delimiter(
                                              self.tmp_outfile_path))
                analysis_df['assay'] = False
                analysis_df.loc[analysis_df['identity'].notnull(),
                                'assay'] = ac_df['assay'].values

                # System listener inputs
                dyn_align_offsets = {}
                temps = {}
                steps = {}
                if sys_listener_path is not None:
                    sys_listener_dir = os.path.dirname(sys_listener_path)
                    clamp_temp_tp = ClampTempTopicParser()
                    old_channel_offset_tp = OldChannelOffsetTopicParser()
                    channel_offset_tp = ChannelOffsetTopicParser()
                    dyn_align_steps_tp = DynamicAlignStepsParser()
                    topic_parsers = [
                        clamp_temp_tp, old_channel_offset_tp,
                        channel_offset_tp, dyn_align_steps_tp
                    ]
                    sys_listener_parser = SystemListenerParser(
                        sys_listener_dir, topic_parsers=topic_parsers)
                    temps = sys_listener_parser.get_topic_results(
                        clamp_temp_tp.topic)
                    dyn_align_offsets = sys_listener_parser.get_topic_results(
                        channel_offset_tp.topic)
                    if len(dyn_align_offsets) < 1:
                        APP_LOGGER.info("Using old channel offset parser...")
                        dyn_align_offsets = sys_listener_parser.get_topic_results(
                            old_channel_offset_tp.topic)
                    else:
                        APP_LOGGER.info("Using new channel offset parser...")
                    steps = sys_listener_parser.get_topic_results(
                        dyn_align_steps_tp.topic)

                generate_dye_scatterplots(analysis_df,
                                          dyes,
                                          self.tmp_dyes_plot_path,
                                          self.job_name,
                                          self.pico1_dye,
                                          dyn_align_offsets=dyn_align_offsets,
                                          temps=temps,
                                          steps=steps)
                shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path)
                APP_LOGGER.info("Dyes scatter plot generated for %s." % \
                    self.job_name)
            except:
                APP_LOGGER.exception("Dyes scatter plot generation failed.")

        try:
            safe_make_dirs(self.tmp_path)

            exp_def_fetcher = ExpDefHandler()
            experiment = exp_def_fetcher.get_experiment_definition(
                self.exp_def_name)

            model_file_dict = available_models(self.ac_method)
            if self.ac_model is None:
                classifier_file = None
            else:
                if self.ac_model in model_file_dict:
                    classifier_file = model_file_dict[self.ac_model]
                else:
                    APP_LOGGER.exception(
                        "Assay caller model %s is unavailable for method %s." %
                        (self.ac_method, self.ac_model))
                    raise Exception(
                        "Assay caller model %s is unavailable for method %s." %
                        (self.ac_method, self.ac_model))

            AssayCallManager(self.num_probes,
                             in_file=self.analysis_file,
                             out_file=self.tmp_outfile_path,
                             scatter_plot_file=self.tmp_scatter_plot_path,
                             training_factor=self.training_factor,
                             assay=self.assay_dye,
                             fiducial=self.pico2_dye,
                             controls=experiment.negative_controls.barcodes,
                             ctrl_thresh=self.ctrl_thresh,
                             n_jobs=8,
                             controls_filtering=self.ctrl_filter,
                             assay_type=self.ac_method,
                             classifier_file=classifier_file)

            if not os.path.isfile(self.tmp_outfile_path):
                raise Exception('Secondary analysis assay caller job ' +
                                'failed: output file not generated.')

            shutil.copy(self.tmp_outfile_path, self.outfile_path)
            gen_dye_scatterplot(experiment.dyes, self.get_sys_listener_path())

            if os.path.isfile(self.tmp_scatter_plot_path):
                shutil.copy(self.tmp_scatter_plot_path, self.scatter_plot_path)
        finally:
            # Regardless of success or failure, remove the copied archive directory
            shutil.rmtree(self.tmp_path, ignore_errors=True)
Пример #30
0
    def process_request(cls, params_dict):
        job_uuids = params_dict[cls.job_uuid_param]
        job_name = params_dict[cls.job_name_param][0]
        exp_def_name = params_dict[cls.exp_defs_param][0]
        training_factor = params_dict[cls.training_param][0]
        ctrl_thresh = params_dict[cls.ctrl_thresh][0]
        ctrl_filter = params_dict[cls.ctrl_filter][0]
        ac_method = params_dict[cls.ac_method][0]

        ac_model = None
        if cls.ac_model in params_dict and params_dict[cls.ac_model][0]:
            ac_model = params_dict[cls.ac_model][0]

        json_response = {ASSAY_CALLER: []}

        # Ensure analysis job exists
        try:
            criteria = {UUID: {'$in': job_uuids}}
            projection = {ID: 0, RESULT: 1, UUID: 1}
            sa_identity_jobs = cls._DB_CONNECTOR.find(SA_IDENTITY_COLLECTION,
                                                      criteria, projection)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            return make_clean_response(json_response, 500)

        # Process each archive
        status_codes = []
        for i, sa_identity_job in enumerate(sa_identity_jobs):
            if len(sa_identity_jobs) == 1:
                cur_job_name = job_name
            else:
                cur_job_name = '%s-%d' % (job_name, i)

            status_code = 200

            if cur_job_name in cls._DB_CONNECTOR.distinct(
                    SA_ASSAY_CALLER_COLLECTION, JOB_NAME):
                status_code = 403
                json_response[ASSAY_CALLER].append({ERROR: 'Job exists.'})
            else:
                try:
                    if not os.path.isfile(sa_identity_job[RESULT]):
                        raise InvalidFileError(sa_identity_job[RESULT])

                    # Create helper functions
                    sac_callable = SaAssayCallerCallable(
                        sa_identity_job[UUID], exp_def_name, training_factor,
                        ctrl_thresh, cls._DB_CONNECTOR, cur_job_name,
                        ctrl_filter, ac_method, ac_model)
                    response = copy.deepcopy(sac_callable.document)
                    callback = make_process_callback(
                        sac_callable.uuid, sac_callable.outfile_path,
                        sac_callable.scatter_plot_path,
                        sac_callable.dyes_plot_path, cls._DB_CONNECTOR)
                    # Add to queue
                    cls._EXECUTION_MANAGER.add_job(response[UUID],
                                                   sac_callable, callback)

                except:
                    APP_LOGGER.exception(traceback.format_exc())
                    response = {
                        JOB_NAME: cur_job_name,
                        ERROR: str(sys.exc_info()[1])
                    }
                    status_code = 500
                finally:
                    if ID in response:
                        del response[ID]
                    json_response[ASSAY_CALLER].append(response)

            status_codes.append(status_code)

        # If all jobs submitted successfully, then 200 should be returned.
        # Otherwise, the maximum status code seems good enough.
        return make_clean_response(json_response, max(status_codes))