Пример #1
0
    def testDatasetLevelParam(self):
        request_obj = copy.deepcopy(self.request_obj)
        request_obj["datasets"][0]["parameters"] = [{
            "name": "max_missing_values",
            "value": "-1"
        }]

        input_object = input_deserializer.create_analysis_input_object(
            request_obj)

        self.assertIsNotNone(input_object)

        # make sure the dataset-level parameters are there
        self.assertIsNotNone(input_object.datasets[0].parameter_dict)
        self.assertEqual(3, len(input_object.datasets[0].parameter_dict))

        for param_name in [
                "max_missing_values", "discrete_norm_function",
                "continuous_norm_function"
        ]:
            self.assertTrue(
                param_name in input_object.datasets[0].parameter_dict)

        self.assertEqual(
            "-1",
            input_object.datasets[0].parameter_dict["max_missing_values"])
Пример #2
0
    def test_ssgsea(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Name", "Sample.1", "Sample.2", "Sample.3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames)

        # test the pathways
        found_pathways = 0
        found_p1 = False
        found_p2 = False

        for pathway in reader:
          found_pathways += 1

          if pathway["Pathway"] == "R-HSA-1280218":
            found_p1 = True
            self.assertEqual("0.0", pathway["Sample.1"].strip())
            self.assertEqual("0.02880908", pathway["Sample.2"].strip())
            self.assertEqual("0.02880908", pathway["Sample.3"].strip())

          if pathway["Pathway"] == "R-HSA-392499":
            found_p2 = True
            self.assertEqual(-0.5, float(pathway["Sample.1"]))
            self.assertEqual(-0.5, float(pathway["Sample.2"]))
            self.assertEqual(-0.5, float(pathway["Sample.3"]))

        self.assertEqual(143, found_pathways)

        self.assertTrue(found_p1)
        self.assertTrue(found_p2)
Пример #3
0
    def test_no_design_filtering(self):
        test_json = """
                        {
                  "analysisId": "test_01",
                  "datasets": [
                    {
                      "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\
                      MITF\\t10\\t0\\t0\\n",
                      "design": {
                        "analysisGroup": [
                          "Treatment",
                          "Control",
                          "Treatment"
                        ],
                        "comparison": {
                          "group1": "Control",
                          "group2": "Treatment"
                        },
                        "samples": [
                          "Sample 1",
                          "Sample 2",
                          "Sample 3"
                        ],
                        "patient": [
                          "Patient 1",
                          "Patient 2",
                          "Patient 3"
                       ]
                      },
                      "name": "First experiment",
                      "type": "rnaseq_counts"
                    }
                  ],
                  "methodName": "ssgsea"
                }
                """

        worker = reactome_analysis_worker.ReactomeAnalysisWorker()

        json_obj = json.loads(test_json)
        request_obj = create_analysis_input_object(json_obj)
        worker._convert_datasets(request_obj)
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        self.assertEqual(3, len(request_obj.datasets[0].df))

        filtered_df = reactome_analysis_worker.ReactomeAnalysisWorker._filter_dataset(request_obj.datasets[0].df,
                                                                                      mappings,
                                                                                      None,
                                                                                      0.5)

        self.assertIsNotNone(filtered_df)
        self.assertEqual(2, len(filtered_df))
Пример #4
0
    def testNoDatasetParams(self):
        input_object = input_deserializer.create_analysis_input_object(
            self.request_obj)

        self.assertIsNotNone(input_object)

        # make sure the dataset-level parameters are there
        self.assertIsNotNone(input_object.datasets[0].parameter_dict)
        self.assertEqual(3, len(input_object.datasets[0].parameter_dict))

        for param_name in [
                "max_missing_values", "discrete_norm_function",
                "continuous_norm_function"
        ]:
            self.assertTrue(
                param_name in input_object.datasets[0].parameter_dict)
Пример #5
0
    def test_analysis(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({
            "MITF", "CD19", "MS4A1", "SDC1", "CD38", "EGFR", "IL10", "IL6",
            "GRB2", "GAB1", "SHC1"
        })

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(233, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)
Пример #6
0
    def test_pathway_string(self):
        json_obj = json.loads(self.test_json)

        # add the parameters
        json_obj["parameters"] = [{"name": "pathways", "value": "R-HSA-1280218,R-HSA-392499"},
                                  {"name": "create_reactome_visualization", "value": "False"}]

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        # there should only be two entries
        n_entries = 0

        for line in reader:
            n_entries += 1

        self.assertEqual(2, n_entries)
Пример #7
0
    def test_heartbeat(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        analyser.set_heartbeat_callback(self.update_heartbeat)
        start_time = int(time.time()) - 1

        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # make sure the heartbeat was updated
        self.assertGreater(self.last_heartbeat, start_time)
Пример #8
0
    def test_parameter_passing(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertEqual(3, len(request.parameters))
        # default values inserted automatically
        self.assertEqual(6, len(request.parameter_dict))
        self.assertTrue("max_missing_values" in request.parameter_dict)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(24, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)

        pathways_up = ("R-HSA-392499", "R-HSA-597592", "R-HSA-2990846",
                       "R-HSA-3108232", "R-HSA-3232118")
        for row in reader:
            if reader.line_num == 2:
                self.assertTrue(row["Pathway"] == "R-HSA-392499")
            if reader.line_num == 6:
                self.assertTrue(row["Pathway"] == "R-HSA-3232118")
            if reader.line_num == 15:
                self.assertTrue(row["Pathway"] == "R-HSA-162582")
            if reader.line_num == 24:
                self.assertTrue(row["Pathway"] == "R-HSA-6811558")

            if row["Pathway"] in pathways_up:
                self.assertTrue(row["Direction"] == "Down")
                self.assertTrue(
                    float(row["av_foldchange"]) < 0,
                    "Incorrect regulation for " + row["Pathway"])
            else:
                self.assertTrue(row["Direction"] == "Up")
                self.assertTrue(float(row["av_foldchange"]) > 0)

        # test the FC result
        self.assertIsNotNone(result[0].fold_changes)
        fc_lines = result[0].fold_changes.split("\n")
        self.assertEqual(4, len(fc_lines))

        fc_reader = csv.DictReader(fc_lines, delimiter="\t")
        fc_fields = ("logFC", "Identifier")

        for field in fc_fields:
            self.assertTrue(field in fc_reader.fieldnames,
                            "Missing FC field " + field)

        mitf_found = False

        for row in fc_reader:
            if row["Identifier"] == "MITF":
                self.assertAlmostEqual(4.53, float(row["logFC"]), delta=0.01)
                mitf_found = True

        self.assertTrue(mitf_found, "Failed to find MITF in FC data")
Пример #9
0
def start_analysis(body):  # noqa: E501
    """Performs the specified gene set analysis

     # noqa: E501

    :param body: Specification of analysis to perform
    :type body: dict | bytes

    :rtype: str
    """
    # get the JSON-encoded dict from the request object
    if connexion.request.is_json:
        analysis_dict = connexion.request.get_json(cache=False)
    # de-compress if it's a gzipped string
    elif connexion.request.content_type == "application/gzip":
        LOGGER.debug("Received gzipped analysis request. Decompressing...")

        decompressed_string = zlib.decompress(connexion.request.data)
        analysis_dict = json.loads(decompressed_string)

        # free the memory again
        del decompressed_string
    else:
        LOGGER.debug(
            "Invalid analysis request submitted. Request body does not describe a JSON object."
        )
        abort(
            406,
            "Invalid analysis request submitted. Request body does not describe a JSON object."
        )
        return

    try:
        analysis_request = input_deserializer.create_analysis_input_object(
            analysis_dict)
    except Exception as e:
        LOGGER.debug("Unknown analysis method submitted: " +
                     analysis_dict["methodName"])
        abort(404, "Unknown analysis method selected.")

    # make sure all datasets have unique names
    all_names = [dataset.name for dataset in analysis_request.datasets]

    if len(all_names) != len(set(all_names)):
        LOGGER.debug("Analysis request contains duplicate names")
        abort(406, "Datasets must not have duplicate names")

    # make sure the analysis design is present
    for n_dataset in range(0, len(analysis_request.datasets)):
        if not analysis_request.datasets[n_dataset].design:
            LOGGER.debug("Analysis request misses design")
            abort(
                406,
                "Invalid request. Dataset '{name}' misses the required experimental design."
                .format(name=analysis_request.datasets[n_dataset].name))
        if not analysis_request.datasets[n_dataset].design.comparison:
            LOGGER.debug("Analysis request misses design comparison")
            abort(
                406,
                "Invalid request. Dataset '{name}' misses the required comparison specification."
                .format(name=analysis_request.datasets[n_dataset].name))

    # generate an analysis id
    analysis_id = str(uuid.uuid1())

    try:
        storage = ReactomeStorage()

        # a very basic sanity check to make sure it's unique
        while storage.analysis_exists(analysis_id):
            analysis_id = str(uuid.uuid1())

        # Load request data from storage
        for n_dataset in range(0, len(analysis_dict["datasets"])):
            data = analysis_dict["datasets"][n_dataset]["data"]

            # Update for external datasets
            if data[0:4] == "rqu_" or len(data) < 20:
                # make sure the request data exists
                if not storage.request_token_exists(data):
                    MISSING_DATA_TOKEN_COUNTER.inc()
                    abort(
                        500, "No data available for storage token '{}'".format(
                            data))

                # load the data
                stored_data = storage.get_request_data(data)

                # update the request object
                analysis_dict["datasets"][n_dataset][
                    "data"] = stored_data.decode("UTF-8")

        # Set the initial status
        encoder = JSONEncoder()

        status = AnalysisStatus(id=analysis_id,
                                status="running",
                                completed=0,
                                description="Queued")
        storage.set_status(analysis_id, encoder.encode(status))

        # Save the request data
        analysis_dict["analysisId"] = analysis_id
        storage.set_analysis_request_data(token=analysis_id,
                                          data=encoder.encode(analysis_dict))

        try:
            # Submit the request to the queue
            queue = ReactomeMQ()
            queue.post_analysis(
                AnalysisRequest(request_id=analysis_id).to_json(),
                analysis_request.method_name)
            LOGGER.debug("Analysis " + analysis_id + " submitted to queue")
            queue.close()

            STARTED_ANALYSIS_COUNTER.inc()

            return analysis_id
        except socket.gaierror as e:
            # update the status
            LOGGER.error("Failed to connect to queuing system: " + str(e))
            status = AnalysisStatus(
                id=analysis_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(analysis_id, encoder.encode(status))

            abort(
                503,
                "Failed to connect to queuing system. Please try again in a few seconds."
            )
        except ReactomeMQException as e:
            LOGGER.error("Failed to post message to queuing system: " + str(e))
            # update the status
            status = AnalysisStatus(
                id=analysis_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(analysis_id, encoder.encode(status))

            abort(
                503,
                "The number of analysis requests is currently too high. Please try again in a few minutes."
            )
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
    except (socket.timeout, socket.gaierror) as e:
        LOGGER.error(
            "Socket timeout connecting to storage or queuing system: " +
            str(e))
        abort(
            503,
            "Failed to connect to downstream system. Please try again in a few minutes."
        )
    def _on_new_analysis(self, ch, method, properties, body):
        """
        Callback function that is triggered whenever a new
        message with an analysis request is received.
        :param ch: The channel the message was received on
        :param method: Method details
        :param properties: Message properties
        :param body: The actual message body (= JSON encoded analysis request)
        """
        LOGGER.debug("Received message.")

        # increment the running analyses
        RUNNING_ANALYSES.inc()

        # create the analysis object
        try:
            mq_request = analysis_request.from_json(body)

            # load the data from storage
            if not self._get_storage().analysis_request_data_exists(
                    mq_request.request_id):
                raise Exception(
                    "Failed to receive request data from storage. Please resubmit your analysis request."
                )

            # load the JSON data from storage and decode it
            body_dict = json.loads(
                self._get_storage().get_analysis_request_data(
                    mq_request.request_id))
            request = create_analysis_input_object(body_dict)
        except Exception as e:
            # This means that the application has a major problem - this should never happen
            LOGGER.critical("Failed to create analysis request object: " +
                            str(e))
            LOGGER.debug("Error details:", exc_info=1)
            # remove the message from the queue
            self._acknowledge_message(ch, method)

            return

        LOGGER.debug("Received analysis request for " + request.method_name +
                     " (" + request.analysis_id + ")")

        # make sure the request contains datasets
        if len(request.datasets) < 1:
            LOGGER.debug(
                "Analysis request {} does not contain any datasets".format(
                    request.analysis_id))
            # update the status as failed
            self._set_status(
                request.analysis_id,
                "failed",
                description="Request did not contain any datasets",
                completed=1)
            self._acknowledge_message(ch, method)
            return

        # create the analyser to use
        reactome_analyser = ReactomeAnalyser.get_analyser_for_method(
            request.method_name.lower())

        # make sure the analyser exists
        if reactome_analyser is None:
            self._set_status(
                request.analysis_id,
                status="failed",
                description="Unsupported method '{}' selected".format(
                    request.method_name),
                completed=1)
            self._acknowledge_message(ch, method)
            return

        # update the status and mark it as received
        self._set_status(request.analysis_id,
                         status="running",
                         description="Converting datasets...",
                         completed=0.05)

        # convert the dataset matrices
        if not self._convert_datasets(request):
            self._acknowledge_message(ch, method)
            return

        # get the reactome server to use
        reactome_server = request.parameter_dict.get("reactome_server",
                                                     "www.reactome.org")
        LOGGER.info("Reactome server: {}".format(reactome_server))

        try:
            identifier_mappings = self._map_identifiers(
                request, reactome_server=reactome_server)
        except Exception as e:
            self._set_status(request.analysis_id,
                             status="failed",
                             description=str(e),
                             completed=1)
            self._acknowledge_message(ch, method)

        # make sure the experimental design matches the number of samples
        if not self._validate_experimental_design(request.datasets,
                                                  request.analysis_id):
            self._acknowledge_message(ch, method)
            return

        # load the matching gene set
        use_interactors = request.parameter_dict.get("use_interactors",
                                                     "False").lower() == "true"
        include_disease = request.parameter_dict.get(
            "include_disease_pathways", "False").lower() == "true"

        # species is always set at human since we use Reactome's mapping feature "to human"
        gene_set = GeneSet.create_from_file(
            generate_pathway_filename(resource="reactome",
                                      species="H**o sapiens",
                                      contains_interactors=use_interactors,
                                      contains_disease=include_disease))

        # filter the datasets
        for dataset in request.datasets:
            dataset.df = self._filter_dataset(dataset.df,
                                              identifier_mappings,
                                              dataset.design,
                                              max_missing_values=float(
                                                  request.parameter_dict.get(
                                                      "max_missing_values",
                                                      0.5)))
            # make sure there are identifiers left
            if dataset.df.size < 1:
                LOGGER.debug("No identifiers left after filter")
                self._set_status(
                    request.analysis_id,
                    status="failed",
                    description=
                    "No identifiers left in dataset {name} after filtering. Please adjust "
                    "the max_missing_values parameter.".format(
                        name=dataset.name),
                    completed=1)
                self._acknowledge_message(ch, method)
                return

        # get the retained identifiers
        identifiers_after_filter = ReactomeAnalysisWorker._extract_identifiers(
            datasets=request.datasets)

        # perform the mapping for every dataset
        mappings = dict()
        for dataset in request.datasets:
            LOGGER.debug("Mapping identifiers for dataset {}".format(
                dataset.name))
            mappings[dataset.name] = GeneSetMapping.create_mapping(
                gene_set=gene_set,
                identifiers=dataset.df[:][dataset.df.dtype.names[0]].tolist(),
                identifier_mapping=identifier_mappings)

        # process the analysis
        self._set_status(
            request.analysis_id,
            status="running",
            description="Performing gene set analysis using {}".format(
                request.method_name),
            completed=0.2)

        try:
            # move the analysis to a separate process in order to "stay alive" in the eyes of
            # the queuing system - rpy2 causes python to stop
            is_analysis_complete = multiprocessing.Event()
            result_queue = multiprocessing.Queue()
            status_queue = multiprocessing.Queue()
            heartbeat_queue = multiprocessing.Queue()
            analysis_process = AnalysisProcess(
                analyser=reactome_analyser,
                request=request,
                gene_set_mappings=mappings,
                gene_set=gene_set,
                identifier_mappings=identifier_mappings,
                on_complete=is_analysis_complete,
                result_queue=result_queue,
                status_queue=status_queue,
                heartbeat_queue=heartbeat_queue)
            LOGGER.debug("Launching process to perform the analysis...")

            analysis_process.start()

            # keep track of the last log heartbeat to see if the process timed out
            last_heartbeat = int(time.time())

            # fetch the blueprint (in parallel) for the REACTOME result conversion
            reactome_blueprint = None

            try:
                # only get the blueprint if the option is set
                if request.parameter_dict.get(
                        "create_reactome_visualization").lower() == "true":
                    LOGGER.debug(
                        "Fetching blueprint for Reactome result conversion")
                    reactome_blueprint = result_converter.perform_reactome_gsa(
                        identifiers=identifiers_after_filter,
                        use_interactors=use_interactors,
                        reactome_server=reactome_server,
                        include_disease=include_disease)
            except Exception as e:
                LOGGER.warning("Failed to retrieve Reactome blueprint: " +
                               str(e))

            # wait for completion
            while analysis_process.is_alive(
            ) and not is_analysis_complete.is_set():
                # test whether the analysis should be interrupted
                if self._get_mq().get_is_shutdown():
                    LOGGER.debug(
                        "Shutdown triggered, terminating analysis process")
                    analysis_process.terminate()
                    analysis_process.join(0.1)
                    return

                # update the last received heartbeat
                if heartbeat_queue.qsize() > 0:
                    try:
                        while heartbeat_queue.qsize() > 0:
                            last_heartbeat = heartbeat_queue.get(block=True,
                                                                 timeout=0.5)
                    except Exception:
                        # ignore any timeouts since these should negatively effect the heartbeat
                        # anyway
                        pass

                # make sure the process sent a heartbeat in the required minimum time
                current_timeout = int(time.time()) - last_heartbeat
                if current_timeout > self.max_timeout:
                    LOGGER.error("Analysis timed out (" +
                                 str(current_timeout) + " seconds)")
                    # add a "nice" Exception to the gsa_result queue
                    result_queue.put(
                        Exception(
                            "Error: Analysis timed out. Please retry the analysis at a later time."
                        ))
                    break

                # receive and process any status updates
                if status_queue.qsize() > 0:
                    try:
                        # only use the last update
                        while status_queue.qsize() > 0:
                            status_object = status_queue.get(block=True,
                                                             timeout=0.5)
                        self._set_status(request.analysis_id,
                                         status="running",
                                         description=status_object.description,
                                         completed=status_object.completed)
                    except Exception:
                        # this can safely be ignored since it is most commonly caused by the fact that the worker
                        # is too busy and fetching of the message timed out
                        pass

                self._get_mq().sleep(1)

            LOGGER.debug("Analysis process completed. Joining process...")

            # for potential cleanup
            analysis_process.join(1)

            # retrieve the result from the queue
            try:
                gsa_results = result_queue.get(block=True, timeout=0.5)
            except queue.Empty:
                gsa_results = None

            LOGGER.debug("Result received from queue.")

            # make sure a result was received
            if not isinstance(gsa_results, list) or len(gsa_results) < 1:
                LOGGER.error("No analysis result retrieved for {}".format(
                    request.analysis_id))

                # test if an exception is returned instead
                if isinstance(gsa_results, Exception):
                    self._set_status(
                        request.analysis_id,
                        status="failed",
                        description="{} analysis failed: {}".format(
                            request.method_name, str(gsa_results)),
                        completed=1)
                else:
                    self._set_status(request.analysis_id,
                                     status="failed",
                                     description="{} analysis failed.".format(
                                         request.method_name),
                                     completed=1)

                self._acknowledge_message(ch, method)
                return

            # create the AnalysisResult object
            analysis_result = AnalysisResult(
                release=os.getenv("REACTOME_VERSION", "68"),
                results=gsa_results,
                mappings=self._convert_mapping_result(identifier_mappings))

            # submit the result to Reactome
            if reactome_blueprint:
                analysis_result.reactome_links = list()
                self._set_status(request.analysis_id,
                                 status="running",
                                 description="Creating REACTOME visualization",
                                 completed=0.9)

                for reactome_type in reactome_analyser.reactome_result_types:
                    LOGGER.debug("Submitting result for " + reactome_type.name)
                    try:
                        pathways_to_exclude = list()

                        if not include_disease:
                            LOGGER.debug(
                                "Excluding {count} disease pathways".format(
                                    count=str(len(
                                        self._get_disease_pathways()))))
                            pathways_to_exclude = self._get_disease_pathways()

                        reactome_link = result_converter.submit_result_to_reactome(
                            result=analysis_result,
                            result_type=reactome_type,
                            reactome_blueprint=reactome_blueprint,
                            min_p=0.05,
                            reactome_server=reactome_server,
                            excluded_pathways=pathways_to_exclude)
                        analysis_result.reactome_links.append(reactome_link)
                    except Exception as e:
                        # simply ignore this error
                        LOGGER.warning(
                            "Failed to submit result to Reactome: " + str(e))

            # save the result
            storage = self._get_storage()
            storage.set_result(analysis_identifier=request.analysis_id,
                               result=json.dumps(analysis_result.to_dict()))
            # update the status
            self._set_status(request.analysis_id,
                             status="complete",
                             description="Analysis done",
                             completed=1)
            self._acknowledge_message(ch, method)

            # send the request to create the report
            if request.parameter_dict.get("create_reports", "False").lower() == "true" or \
               len(request.parameter_dict.get("email", "")) > 3:
                message_mq = ReactomeMQ(queue_name=REPORT_QUEUE)
                report_request_obj = report_request.ReportRequest(
                    analysis_id=request.analysis_id,
                    user_mail=request.parameter_dict.get("email", None),
                    include_interactors=use_interactors,
                    include_disease=include_disease)
                message_mq.post_analysis(analysis=report_request_obj.to_json(),
                                         method="report")

            # count the complete analysis
            COMPLETED_ANALYSES.inc()
        except Exception as e:
            self._set_status(request.analysis_id,
                             status="failed",
                             description="Failed to analyse dataset: " +
                             str(e),
                             completed=1)
            self._acknowledge_message(ch, method)
            if self.debug:
                raise e
Пример #11
0
    def test_no_design(self):
        test_json = """
                        {
                  "analysisId": "test_01",
                  "datasets": [
                    {
                      "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\
                      MITF\\t10\\t0\\t0\\n",
                      "name": "First experiment",
                      "type": "rnaseq_counts"
                    }
                  ],
                  "methodName": "ssgsea"
                }
                """
        json_obj = json.loads(test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertIsNotNone(request)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Sample_1", "Sample2", "Sample_3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames, "Missing required field " + required_field)

        # test the pathways
        found_pathways = 0

        for pathway in reader:
            found_pathways += 1

            if pathway["Pathway"] == "R-HSA-1280218":
                self.assertEqual("0.0", pathway["Sample_1"].strip())
                self.assertEqual("0.02880908", pathway["Sample2"].strip())
                self.assertEqual("0.02880908", pathway["Sample_3"].strip())

            if pathway["Pathway"] == "R-HSA-392499":
                self.assertEqual(-0.5, float(pathway["Sample_1"]))
                self.assertEqual(-0.5, float(pathway["Sample2"]))
                self.assertEqual(-0.5, float(pathway["Sample_3"]))

        self.assertEqual(143, found_pathways)