Exemplo n.º 1
0
    def test_failed_mapping(self):
        genes = {"Gene1", "Gene2"}
        mapped_identifiers = util.map_identifiers(genes, return_all=True)

        self.assertEqual(0, len(mapped_identifiers))

        # test with a single gene
        genes = {"Gene1"}
        mapped_identifiers = util.map_identifiers(genes, return_all=True)

        self.assertEqual(0, len(mapped_identifiers))
Exemplo n.º 2
0
    def test_ssgsea(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Name", "Sample.1", "Sample.2", "Sample.3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames)

        # test the pathways
        found_pathways = 0
        found_p1 = False
        found_p2 = False

        for pathway in reader:
          found_pathways += 1

          if pathway["Pathway"] == "R-HSA-1280218":
            found_p1 = True
            self.assertEqual("0.0", pathway["Sample.1"].strip())
            self.assertEqual("0.02880908", pathway["Sample.2"].strip())
            self.assertEqual("0.02880908", pathway["Sample.3"].strip())

          if pathway["Pathway"] == "R-HSA-392499":
            found_p2 = True
            self.assertEqual(-0.5, float(pathway["Sample.1"]))
            self.assertEqual(-0.5, float(pathway["Sample.2"]))
            self.assertEqual(-0.5, float(pathway["Sample.3"]))

        self.assertEqual(143, found_pathways)

        self.assertTrue(found_p1)
        self.assertTrue(found_p2)
Exemplo n.º 3
0
    def test_no_design_filtering(self):
        test_json = """
                        {
                  "analysisId": "test_01",
                  "datasets": [
                    {
                      "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\
                      MITF\\t10\\t0\\t0\\n",
                      "design": {
                        "analysisGroup": [
                          "Treatment",
                          "Control",
                          "Treatment"
                        ],
                        "comparison": {
                          "group1": "Control",
                          "group2": "Treatment"
                        },
                        "samples": [
                          "Sample 1",
                          "Sample 2",
                          "Sample 3"
                        ],
                        "patient": [
                          "Patient 1",
                          "Patient 2",
                          "Patient 3"
                       ]
                      },
                      "name": "First experiment",
                      "type": "rnaseq_counts"
                    }
                  ],
                  "methodName": "ssgsea"
                }
                """

        worker = reactome_analysis_worker.ReactomeAnalysisWorker()

        json_obj = json.loads(test_json)
        request_obj = create_analysis_input_object(json_obj)
        worker._convert_datasets(request_obj)
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        self.assertEqual(3, len(request_obj.datasets[0].df))

        filtered_df = reactome_analysis_worker.ReactomeAnalysisWorker._filter_dataset(request_obj.datasets[0].df,
                                                                                      mappings,
                                                                                      None,
                                                                                      0.5)

        self.assertIsNotNone(filtered_df)
        self.assertEqual(2, len(filtered_df))
    def _map_identifiers(self, request: AnalysisInput,
                         reactome_server: str) -> dict:
        """
        Map all submitted identifiers using Reactom's mapping service.
        :param request: The analysis request
        :param reactome_server: The reactome server to use
        :returns: A dict with the original identifier as key and the mappings as value (list)
        """
        # get all identifiers
        all_identifiers = ReactomeAnalysisWorker._extract_identifiers(
            request.datasets)

        # make sure more than one gene was submitted
        if len(all_identifiers) <= 1:
            LOGGER.debug(
                "Analysis request {} contains an insufficient number of genes ({})"
                .format(request.analysis_id, str(len(all_identifiers))))
            raise Exception("Analysis requires >1 genes.")

        # get the identifier mappings
        self._set_status(request.analysis_id,
                         status="running",
                         description="Mapping identifiers...",
                         completed=0.1)

        try:
            identifier_mappings = util.map_identifiers(
                all_identifiers,
                return_all=True,
                reactome_server=reactome_server)
        except util.MappingException as e:
            LOGGER.debug("Identifier mapping failed", exc_info=1)
            raise Exception("Invalid gene/protein identifiers submitted")
        except Exception as e:
            LOGGER.error("Failed to connect to mapping service: " + str(e))
            LOGGER.debug("Mapping failed", exc_info=1)
            raise Exception(
                "Failed to contact identifier mapping service. Please try again later."
            )

        LOGGER.debug("Mapped {} of {} submitted identifiers".format(
            str(len(identifier_mappings)), str(len(all_identifiers))))

        # make sure that identifiers were mapped
        if len(identifier_mappings) < 1:
            raise Exception("Failed to map any submitted identifiers")

        return identifier_mappings
Exemplo n.º 5
0
    def testMapping(self):
        # use all genes from one pathway
        genes = set()
        with open(
                os.path.join(os.path.dirname(__file__), "testfiles",
                             "R-HSA-1980143.uniprot.txt")) as reader:
            for line in reader:
                genes.add(line.strip())

        mapped_identifiers = util.map_identifiers(genes, return_all=True)

        self.assertEqual(len(genes), len(mapped_identifiers))

        # all identifiers should only map to a single one
        for mapped_identifier in mapped_identifiers.values():
            # There are multiple mappings when referring to isoforms (one case)
            self.assertTrue(len(mapped_identifier) < 3,
                            msg="Multiple mappings for {}".format(
                                ",".join(mapped_identifier)))
Exemplo n.º 6
0
    def test_analysis(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({
            "MITF", "CD19", "MS4A1", "SDC1", "CD38", "EGFR", "IL10", "IL6",
            "GRB2", "GAB1", "SHC1"
        })

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(233, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)
Exemplo n.º 7
0
    def test_pathway_string(self):
        json_obj = json.loads(self.test_json)

        # add the parameters
        json_obj["parameters"] = [{"name": "pathways", "value": "R-HSA-1280218,R-HSA-392499"},
                                  {"name": "create_reactome_visualization", "value": "False"}]

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        # there should only be two entries
        n_entries = 0

        for line in reader:
            n_entries += 1

        self.assertEqual(2, n_entries)
Exemplo n.º 8
0
    def test_heartbeat(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        analyser.set_heartbeat_callback(self.update_heartbeat)
        start_time = int(time.time()) - 1

        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # make sure the heartbeat was updated
        self.assertGreater(self.last_heartbeat, start_time)
Exemplo n.º 9
0
    def test_parameter_passing(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertEqual(3, len(request.parameters))
        # default values inserted automatically
        self.assertEqual(6, len(request.parameter_dict))
        self.assertTrue("max_missing_values" in request.parameter_dict)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(24, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)

        pathways_up = ("R-HSA-392499", "R-HSA-597592", "R-HSA-2990846",
                       "R-HSA-3108232", "R-HSA-3232118")
        for row in reader:
            if reader.line_num == 2:
                self.assertTrue(row["Pathway"] == "R-HSA-392499")
            if reader.line_num == 6:
                self.assertTrue(row["Pathway"] == "R-HSA-3232118")
            if reader.line_num == 15:
                self.assertTrue(row["Pathway"] == "R-HSA-162582")
            if reader.line_num == 24:
                self.assertTrue(row["Pathway"] == "R-HSA-6811558")

            if row["Pathway"] in pathways_up:
                self.assertTrue(row["Direction"] == "Down")
                self.assertTrue(
                    float(row["av_foldchange"]) < 0,
                    "Incorrect regulation for " + row["Pathway"])
            else:
                self.assertTrue(row["Direction"] == "Up")
                self.assertTrue(float(row["av_foldchange"]) > 0)

        # test the FC result
        self.assertIsNotNone(result[0].fold_changes)
        fc_lines = result[0].fold_changes.split("\n")
        self.assertEqual(4, len(fc_lines))

        fc_reader = csv.DictReader(fc_lines, delimiter="\t")
        fc_fields = ("logFC", "Identifier")

        for field in fc_fields:
            self.assertTrue(field in fc_reader.fieldnames,
                            "Missing FC field " + field)

        mitf_found = False

        for row in fc_reader:
            if row["Identifier"] == "MITF":
                self.assertAlmostEqual(4.53, float(row["logFC"]), delta=0.01)
                mitf_found = True

        self.assertTrue(mitf_found, "Failed to find MITF in FC data")
Exemplo n.º 10
0
    def test_no_design(self):
        test_json = """
                        {
                  "analysisId": "test_01",
                  "datasets": [
                    {
                      "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\
                      MITF\\t10\\t0\\t0\\n",
                      "name": "First experiment",
                      "type": "rnaseq_counts"
                    }
                  ],
                  "methodName": "ssgsea"
                }
                """
        json_obj = json.loads(test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertIsNotNone(request)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Sample_1", "Sample2", "Sample_3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames, "Missing required field " + required_field)

        # test the pathways
        found_pathways = 0

        for pathway in reader:
            found_pathways += 1

            if pathway["Pathway"] == "R-HSA-1280218":
                self.assertEqual("0.0", pathway["Sample_1"].strip())
                self.assertEqual("0.02880908", pathway["Sample2"].strip())
                self.assertEqual("0.02880908", pathway["Sample_3"].strip())

            if pathway["Pathway"] == "R-HSA-392499":
                self.assertEqual(-0.5, float(pathway["Sample_1"]))
                self.assertEqual(-0.5, float(pathway["Sample2"]))
                self.assertEqual(-0.5, float(pathway["Sample_3"]))

        self.assertEqual(143, found_pathways)
Exemplo n.º 11
0
    def test_interactor_mapping(self):
        mapped_identifier = util.map_identifiers(["MS4A1"], return_all=True)

        self.assertEqual(1, len(mapped_identifier))
        self.assertEqual(1, len(mapped_identifier["MS4A1"]))
        self.assertEqual("P11836", mapped_identifier["MS4A1"][0])